From 77f0b20a8612647359a2c6cb0334eceb73a5cba8 Mon Sep 17 00:00:00 2001 From: ClaroHenrique Date: Mon, 31 Oct 2022 23:15:19 -0300 Subject: [PATCH 01/20] Refact imfilter code --- src/ImageQuilting.jl | 18 +-- src/imfilter.default.jl | 17 --- src/imfilter.opencl.jl | 111 ------------------ .../imfilter_cuda.jl} | 5 +- src/kernel/imfilter_default.jl | 17 +++ src/kernel/imfilter_opencl.jl | 107 +++++++++++++++++ src/test_imfilter.jl | 46 -------- src/utils_gpu.jl | 93 --------------- 8 files changed, 131 insertions(+), 283 deletions(-) delete mode 100644 src/imfilter.default.jl delete mode 100644 src/imfilter.opencl.jl rename src/{imfilter.cuda.jl => kernel/imfilter_cuda.jl} (97%) create mode 100644 src/kernel/imfilter_default.jl create mode 100644 src/kernel/imfilter_opencl.jl delete mode 100644 src/test_imfilter.jl delete mode 100644 src/utils_gpu.jl diff --git a/src/ImageQuilting.jl b/src/ImageQuilting.jl index 4afbcece..82fe122a 100644 --- a/src/ImageQuilting.jl +++ b/src/ImageQuilting.jl @@ -17,23 +17,19 @@ using ProgressMeter: Progress, next! using FFTW: set_num_threads using CpuId: cpucores using RecipesBase -using Primes using CUDA using OpenCL using CLFFT -const clfft = CLFFT +using PlatformAware using Base: @nexprs, @nloops, @nref using SparseArrays: spzeros using Statistics: mean, std using Random -using PlatformAware - import GeoStatsBase: preprocess, solvesingle include("utils.jl") -include("utils_gpu.jl") include("plotrecipes.jl") include("relaxation.jl") include("taumodel.jl") @@ -42,14 +38,10 @@ include("iqsim.jl") include("voxelreuse.jl") include("geostats.jl") -include("test_imfilter.jl") - -function __init__() - include(pkgdir(@__MODULE__) * "/src/kernels.jl") - include(pkgdir(@__MODULE__) * "/src/imfilter.default.jl") - include(pkgdir(@__MODULE__) * "/src/imfilter.cuda.jl") - include(pkgdir(@__MODULE__) * "/src/imfilter.opencl.jl") -end +include("kernels.jl") +include("kernel/imfilter_default.jl") +include("kernel/imfilter_cuda.jl") +include("kernel/imfilter_opencl.jl") export # functions diff --git a/src/imfilter.default.jl b/src/imfilter.default.jl deleted file mode 100644 index ea125534..00000000 --- a/src/imfilter.default.jl +++ /dev/null @@ -1,17 +0,0 @@ -# ------------------------------------------------------------------ -# Licensed under the MIT License. See LICENCE in the project root. -# ------------------------------------------------------------------ - -@platform default function init_imfilter_kernel() - println("Running on DEFAULT PLATFORM") -end - -@platform default function array_kernel(array) array end - -@platform default function view_kernel(array, I) view(array, I) end - -@platform default function imfilter_kernel(img, krn) - imfilter(img, centered(krn), Inner(), Algorithm.FFT()) -end - - diff --git a/src/imfilter.opencl.jl b/src/imfilter.opencl.jl deleted file mode 100644 index c9c1f005..00000000 --- a/src/imfilter.opencl.jl +++ /dev/null @@ -1,111 +0,0 @@ -# ------------------------------------------------------------------ -# Licensed under the MIT License. See LICENCE in the project root. -# ------------------------------------------------------------------ - -@platform aware function init_imfilter_kernel({accelerator_count::(@atleast 1), accelerator_api::OpenCL_API}) - println("Running on OpenCL GPU") - global GPU = gpu_setup() -end - -@platform aware function array_kernel({accelerator_count::(@atleast 1), accelerator_api::OpenCL_API}, array) array end - -@platform aware function view_kernel({accelerator_count::(@atleast 1), accelerator_api::OpenCL_API}, array, I) view(array, I) end - -@platform aware function imfilter_kernel({accelerator_count::(@atleast 1), accelerator_api::OpenCL_API}, img, kern) - imfilter_opencl(img,kern) -end - - -function imfilter_opencl(img, kern) - # retrieve basic info - N = ndims(img) - T = ComplexF64 - - # GPU metadata - ctx = GPU.ctx; queue = GPU.queue - mult_kernel = GPU.mult_kernel - - # operations with complex type - img = T.(img) - kern = T.(kern) - - # kernel may require padding - prepad = ntuple(d->(size(kern,d)-1) ÷ 2, N) - postpad = ntuple(d->(size(kern,d) ) ÷ 2, N) - - # OpenCL FFT expects powers of 2, 3, 5, 7, 11 or 13 - clpad = clfftpad(img) - A = padarray(img, Pad(:symmetric, zeros(Int, ndims(img)), clpad)) - A = parent(A) - - krn = zeros(T, size(A)) - indexesK = ntuple(d->[size(A,d)-prepad[d]+1:size(A,d);1:size(kern,d)-prepad[d]], N) - krn[indexesK...] = reflect(kern) - - # plan FFT - p = clfft.Plan(T, ctx, size(A)) - clfft.set_layout!(p, :interleaved, :interleaved) - clfft.set_result!(p, :inplace) - clfft.bake!(p, queue) - - # populate GPU memory - bufA = cl.Buffer(T, ctx, :copy, hostbuf=A) - bufkrn = cl.Buffer(T, ctx, :copy, hostbuf=krn) - bufRES = cl.Buffer(T, ctx, length(A)) - - # compute ifft(fft(A).*fft(kern)) - clfft.enqueue_transform(p, :forward, [queue], bufA, nothing) - clfft.enqueue_transform(p, :forward, [queue], bufkrn, nothing) - queue(mult_kernel, length(A), nothing, bufA, bufkrn, bufRES) - - clfft.enqueue_transform(p, :backward, [queue], bufRES, nothing) - - # get result back - AF = reshape(cl.read(queue, bufRES), size(A)) - - # undo OpenCL FFT paddings - AF = view(AF, ntuple(d->1:size(AF,d)-clpad[d], N)...) - - out = Array{realtype(eltype(AF))}(undef, ntuple(d->size(img,d) - prepad[d] - postpad[d], N)...) - indexesA = ntuple(d->postpad[d]+1:size(img,d)-prepad[d], N) - copyreal!(out, AF, indexesA) - - out - - end - - @generated function reflect(A::AbstractArray{T,N}) where {T,N} - quote - B = Array{T,N}(undef,size(A)) - @nexprs $N d->(n_d = size(A, d)+1) - @nloops $N i A d->(j_d = n_d - i_d) begin - @nref($N, B, j) = @nref($N, A, i) - end - B - end - end - - -for N = 1:5 - @eval begin - function copyreal!(dst::Array{T,$N}, src, I::Tuple{Vararg{UnitRange{Int}}}) where {T<:Real} - @nexprs $N d->(I_d = I[d]) - @nloops $N i dst d->(j_d = first(I_d)+i_d-1) begin - (@nref $N dst i) = real(@nref $N src j) - end - dst - end - function copyreal!(dst::Array{T,$N}, src, I::Tuple{Vararg{UnitRange{Int}}}) where {T<:Complex} - @nexprs $N d->I_d = I[d] - @nloops $N i dst d->(j_d = first(I_d)+i_d-1) begin - (@nref $N dst i) = @nref $N src j - end - dst - end - end -end - -realtype(::Type{R}) where {R<:Real} = R -realtype(::Type{Complex{R}}) where {R<:Real} = R - - diff --git a/src/imfilter.cuda.jl b/src/kernel/imfilter_cuda.jl similarity index 97% rename from src/imfilter.cuda.jl rename to src/kernel/imfilter_cuda.jl index d4cc852a..4700f0d7 100644 --- a/src/imfilter.cuda.jl +++ b/src/kernel/imfilter_cuda.jl @@ -11,9 +11,8 @@ end @platform aware function view_kernel({accelerator_count::(@atleast 1), accelerator_api::CUDA_API}, array, I) Array(array[I]) end @platform aware function imfilter_kernel({accelerator_count::(@atleast 1), accelerator_api::CUDA_API}, img, krn) - imfilter_cuda(img,krn) - end - + imfilter_cuda(img,krn) +end function imfilter_cuda(img, krn) diff --git a/src/kernel/imfilter_default.jl b/src/kernel/imfilter_default.jl new file mode 100644 index 00000000..623e2a31 --- /dev/null +++ b/src/kernel/imfilter_default.jl @@ -0,0 +1,17 @@ +# ------------------------------------------------------------------ +# Licensed under the MIT License. See LICENCE in the project root. +# ------------------------------------------------------------------ + +# @platform default function init_imfilter_kernel() +# println("Running on DEFAULT PLATFORM") +# end + +# @platform default function array_kernel(array) array end + +# @platform default function view_kernel(array, I) view(array, I) end + +# @platform default function imfilter_kernel(img, krn) +# imfilter(img, centered(krn), Inner(), Algorithm.FFT()) +# end + + diff --git a/src/kernel/imfilter_opencl.jl b/src/kernel/imfilter_opencl.jl new file mode 100644 index 00000000..06c9a535 --- /dev/null +++ b/src/kernel/imfilter_opencl.jl @@ -0,0 +1,107 @@ +# ------------------------------------------------------------------ +# Licensed under the MIT License. See LICENCE in the project root. +# ------------------------------------------------------------------ + +#@platform aware function init_imfilter_kernel({accelerator_api::OpenCL_API}) +@platform default function init_imfilter_kernel() + println("Running on OpenCL device") + global cl_device, cl_ctx, cl_queue = cl.create_compute_context() +end + +#@platform aware function array_kernel({accelerator_api::OpenCL_API}, array) array end +@platform default function array_kernel(array) array end + +#@platform aware function view_kernel({accelerator_api::OpenCL_API}, array, I) view(array, I) end +@platform default function view_kernel(array, I) view(array, I) end + +#@platform aware function imfilter_kernel({accelerator_api::OpenCL_API}, img, kern) +@platform default function imfilter_kernel(img, krn) + imfilter_opencl(img, krn) +end + +function imfilter_opencl(img, krn) + # retrieve basic info + T = ComplexF64 + + # retrieve global OpenCL info + ctx, queue = cl_ctx, cl_queue + + # build OpenCL program kernels + conj_kernel = build_conj_kernel(ctx) + mult_kernel = build_mult_kernel(ctx) + + # pad img to support CLFFT operations + padimg = pad_opencl_img(img) + + # pad krn to common size with padimg + padkrn = zeros(eltype(img), size(padimg)) + padkrn[CartesianIndices(krn)] = krn + + # convert to Complex + fftimg = T.(padimg) + fftkrn = T.(padkrn) + + # OpenCl setup + plan = CLFFT.Plan(T, ctx, size(fftimg)) + CLFFT.set_layout!(plan, :interleaved, :interleaved) + CLFFT.set_result!(plan, :inplace) + CLFFT.bake!(plan, queue) + + # populate device memory + bufimg = cl.Buffer(T, ctx, :copy, hostbuf=fftimg) + bufkrn = cl.Buffer(T, ctx, :copy, hostbuf=fftkrn) + bufres = cl.Buffer(T, ctx, :w, length(fftimg)) + + # transform img and krn to FFT representation + CLFFT.enqueue_transform(plan, :forward, [queue], bufimg, nothing) + CLFFT.enqueue_transform(plan, :forward, [queue], bufkrn, nothing) + + # compute ifft(fft(A).*conj.(fft(krn))) + queue(conj_kernel, length(fftimg), nothing, bufkrn) + queue(mult_kernel, length(fftimg), nothing, bufimg, bufkrn, bufres) + CLFFT.enqueue_transform(plan, :backward, [queue], bufres, nothing) + + # recover result + result = reshape(cl.read(queue, bufres), size(fftimg)) + real_result = real.(result) + + finalsize = size(img) .- (size(krn) .- 1) + real_result[CartesianIndices(finalsize)] +end + +function build_mult_kernel(ctx) + mult_kernel = " + __kernel void mult(__global const double2 *a, + __global const double2 *b, + __global double2 *c) + { + int gid = get_global_id(0); + c[gid].x = a[gid].x*b[gid].x - a[gid].y*b[gid].y; + c[gid].y = a[gid].x*b[gid].y + a[gid].y*b[gid].x; + } + " + prog = cl.Program(ctx, source=mult_kernel) |> cl.build! + cl.Kernel(prog, "mult") +end + +function build_conj_kernel(ctx) + conj_kernel = " + __kernel void conj(__global double2 *a) + { + int gid = get_global_id(0); + a[gid].y = -a[gid].y; + } + " + prog = cl.Program(ctx, source=conj_kernel) |> cl.build! + cl.Kernel(prog, "conj") +end + +function pad_opencl_img(img) + # OpenCL FFT expects products of powers of 2, 3, 5, 7, 11 or 13 + radices = CLFFT.supported_radices() + newsize = map(dim -> nextprod(radices, dim), size(img)) + + padimg = zeros(eltype(img), newsize) + padimg[CartesianIndices(img)] = img + padimg +end diff --git a/src/test_imfilter.jl b/src/test_imfilter.jl deleted file mode 100644 index 5f46d24c..00000000 --- a/src/test_imfilter.jl +++ /dev/null @@ -1,46 +0,0 @@ -using BenchmarkTools - -const TOLERANCE = 1e-3 - -function allclose(x::AbstractArray{T}, y::AbstractArray{T}; rtol=1e-5, atol=1e-8) where {T} - @assert length(x) == length(y) - @inbounds begin - for i in 1:length(x) - xx, yy = x[i], y[i] - if !(isapprox(xx, yy; rtol=rtol, atol=atol)) - return false - end - end - end - return true -end - -function test_imfilter(N) - - @info "start" - - img = rand(Float64,(N,N,N)) - krn = rand(Float64,(10,10,10)) - - X = Int(round(N/2)) - - @info "FIRST RUN - GPU - CUDA" - r1 = @btime imfilter_cuda(CuArray($img), $krn) - @info "check = ", r1[X,X,X] - - @info("SECOND RUN - GPU - OpenCL") - global GPU = gpu_setup() - r2 = @btime imfilter_opencl($img, $krn) - @info "check = ", r2[X,X,X] - - @info "THIRD RUN - CPU - ImageFiltering" - r3 = @btime imfilter($img, centered($krn), Inner(), Algorithm.FFT()) - @info "check = ", r3[X,X,X] - - @info "r1 & r2", allclose(r1, r2; rtol=1e-2, atol=1e-3) - @info "r1 & r3", allclose(r1, r3; rtol=1e-2, atol=1e-3) - @info "r2 & r3", allclose(r2, r3; rtol=1e-2, atol=1e-3) - - @info "end" - -end \ No newline at end of file diff --git a/src/utils_gpu.jl b/src/utils_gpu.jl deleted file mode 100644 index 3086feca..00000000 --- a/src/utils_gpu.jl +++ /dev/null @@ -1,93 +0,0 @@ -# ------------------------------------------------------------------ -# Licensed under the MIT License. See LICENCE in the project root. -# ------------------------------------------------------------------ - -struct GPUmeta - dev - ctx - queue - mult_kernel -end - -function gpu_setup() - @assert cl ≠ nothing "OpenCL.jl not installed, cannot use GPU" - @assert clfft ≠ nothing "CLFFT.jl not installed, cannot use GPU" - - devs = cl.devices(:gpu) - - if isempty(devs) - @warn "GPU not found, falling back to other OpenCL devices" - devs = cl.devices() - end - @assert !isempty(devs) "OpenCL device not found, make sure drivers are installed" - - dev = [] - devnames = map(d -> d[:platform][:name], devs) - for vendor in ["NVIDIA","AMD","Intel"], (idx,name) in enumerate(devnames) - if occursin(vendor, name) - dev = devs[idx] - break - end - end - - devtype = uppercase(string(dev[:device_type])) - devname = dev[:name] - - @info "using $devtype $devname" - - ctx = cl.Context(dev) - queue = cl.CmdQueue(ctx) - mult_kernel = basic_kernels(ctx) - - GPUmeta(dev, ctx, queue, mult_kernel) -end - -function basic_kernels(ctx) - mult_kernel = " - __kernel void mult(__global const double2 *a, - __global const double2 *b, - __global double2 *c) - { - int gid = get_global_id(0); - c[gid].x = a[gid].x*b[gid].x - a[gid].y*b[gid].y; - c[gid].y = a[gid].x*b[gid].y + a[gid].y*b[gid].x; - } - " - - # build OpenCL program - prog = cl.Program(ctx, source=mult_kernel) |> cl.build! - - cl.Kernel(prog, "mult") -end - - - -function clfftpad(A::AbstractArray) - # clFFT releases support powers of 2, 3, 5, ... - radices = [2,3,5] - v = clfft.version() - v ≥ v"2.8.0" && push!(radices, 7) - v ≥ v"2.12.0" && push!(radices, 11, 13) - - result = Int[] - for s in size(A) - fs = keys(factor(s)) - if fs ⊆ radices - push!(result, 0) - else - # Try a closer number that has prime factors of 2 and 3. - # Use the next power of 2 (say N) to get multiple new - # candidates. - N = nextpow(2,s) - - # fractions of N: 100%, 93%, 84%, 75%, 56% - candidates = [N, 15(N÷16), 27(N÷32), 3(N÷4), 9(N÷16)] - candidates = candidates[candidates .> s] - n = minimum(candidates) - - push!(result, n - s) - end - end - - result -end From a1087b6113da643d47c1e0a39e8444c6eafa96bb Mon Sep 17 00:00:00 2001 From: ClaroHenrique Date: Tue, 1 Nov 2022 14:43:57 -0300 Subject: [PATCH 02/20] Update PlatformAware syntax --- src/kernel/imfilter_default.jl | 19 +++++++++++-------- src/kernel/imfilter_opencl.jl | 12 ++++-------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/kernel/imfilter_default.jl b/src/kernel/imfilter_default.jl index 623e2a31..c270622e 100644 --- a/src/kernel/imfilter_default.jl +++ b/src/kernel/imfilter_default.jl @@ -2,16 +2,19 @@ # Licensed under the MIT License. See LICENCE in the project root. # ------------------------------------------------------------------ -# @platform default function init_imfilter_kernel() -# println("Running on DEFAULT PLATFORM") -# end +@platform default function init_imfilter_kernel() + println("Running on DEFAULT PLATFORM") +end -# @platform default function array_kernel(array) array end +@platform default function array_kernel(array) array end -# @platform default function view_kernel(array, I) view(array, I) end +@platform default function view_kernel(array, I) view(array, I) end -# @platform default function imfilter_kernel(img, krn) -# imfilter(img, centered(krn), Inner(), Algorithm.FFT()) -# end +@platform default function imfilter_kernel(img, krn) + imfilter_cpu(img, krn) +end +function imfilter_cpu(img, krn) + imfilter(img, centered(krn), Inner(), Algorithm.FFT()) +end diff --git a/src/kernel/imfilter_opencl.jl b/src/kernel/imfilter_opencl.jl index 06c9a535..b5ad98a0 100644 --- a/src/kernel/imfilter_opencl.jl +++ b/src/kernel/imfilter_opencl.jl @@ -2,20 +2,16 @@ # Licensed under the MIT License. See LICENCE in the project root. # ------------------------------------------------------------------ -#@platform aware function init_imfilter_kernel({accelerator_api::OpenCL_API}) -@platform default function init_imfilter_kernel() +@platform aware function init_imfilter_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}) println("Running on OpenCL device") global cl_device, cl_ctx, cl_queue = cl.create_compute_context() end -#@platform aware function array_kernel({accelerator_api::OpenCL_API}, array) array end -@platform default function array_kernel(array) array end +@platform aware function array_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}, array) array end -#@platform aware function view_kernel({accelerator_api::OpenCL_API}, array, I) view(array, I) end -@platform default function view_kernel(array, I) view(array, I) end +@platform aware function view_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}, array, I) view(array, I) end -#@platform aware function imfilter_kernel({accelerator_api::OpenCL_API}, img, kern) -@platform default function imfilter_kernel(img, krn) +@platform aware function imfilter_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}, img, krn) imfilter_opencl(img, krn) end From a24757234f3a39e20e83e679f89c0133f1122761 Mon Sep 17 00:00:00 2001 From: ClaroHenrique Date: Wed, 2 Nov 2022 11:16:24 -0300 Subject: [PATCH 03/20] Build OpenCL context locally --- src/kernel/imfilter_cuda.jl | 2 -- src/kernel/imfilter_default.jl | 1 - src/kernel/imfilter_opencl.jl | 3 +-- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/kernel/imfilter_cuda.jl b/src/kernel/imfilter_cuda.jl index 4700f0d7..0ec82886 100644 --- a/src/kernel/imfilter_cuda.jl +++ b/src/kernel/imfilter_cuda.jl @@ -28,6 +28,4 @@ function imfilter_cuda(img, krn) # recover result finalsize = size(img) .- (size(krn) .- 1) real.(result[CartesianIndices(finalsize)]) |> Array - end - diff --git a/src/kernel/imfilter_default.jl b/src/kernel/imfilter_default.jl index c270622e..a998904d 100644 --- a/src/kernel/imfilter_default.jl +++ b/src/kernel/imfilter_default.jl @@ -17,4 +17,3 @@ end function imfilter_cpu(img, krn) imfilter(img, centered(krn), Inner(), Algorithm.FFT()) end - diff --git a/src/kernel/imfilter_opencl.jl b/src/kernel/imfilter_opencl.jl index b5ad98a0..460d1011 100644 --- a/src/kernel/imfilter_opencl.jl +++ b/src/kernel/imfilter_opencl.jl @@ -4,7 +4,6 @@ @platform aware function init_imfilter_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}) println("Running on OpenCL device") - global cl_device, cl_ctx, cl_queue = cl.create_compute_context() end @platform aware function array_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}, array) array end @@ -20,7 +19,7 @@ function imfilter_opencl(img, krn) T = ComplexF64 # retrieve global OpenCL info - ctx, queue = cl_ctx, cl_queue + device, ctx, queue = cl.create_compute_context() # build OpenCL program kernels conj_kernel = build_conj_kernel(ctx) From df8479506cf0c50349fc9ab9dcfce48790f67813 Mon Sep 17 00:00:00 2001 From: Francisco Heron de Carvalho Junior Date: Thu, 3 Nov 2022 11:05:38 -0400 Subject: [PATCH 04/20] adjust in CUDA kernel --- src/ImageQuilting.jl | 11 +++++++---- src/kernel/imfilter_cuda.jl | 18 +++++++++++++----- src/kernel/imfilter_default.jl | 4 ++-- src/kernel/imfilter_opencl.jl | 4 ++-- src/kernels.jl | 1 + 5 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/ImageQuilting.jl b/src/ImageQuilting.jl index 82fe122a..d6f8b01f 100644 --- a/src/ImageQuilting.jl +++ b/src/ImageQuilting.jl @@ -17,6 +17,7 @@ using ProgressMeter: Progress, next! using FFTW: set_num_threads using CpuId: cpucores using RecipesBase + using CUDA using OpenCL using CLFFT @@ -38,10 +39,12 @@ include("iqsim.jl") include("voxelreuse.jl") include("geostats.jl") -include("kernels.jl") -include("kernel/imfilter_default.jl") -include("kernel/imfilter_cuda.jl") -include("kernel/imfilter_opencl.jl") +function __init__() + include("src/kernels.jl") + include("src/kernel/imfilter_default.jl") + include("src/kernel/imfilter_cuda.jl") + include("src/kernel/imfilter_opencl.jl") +end export # functions diff --git a/src/kernel/imfilter_cuda.jl b/src/kernel/imfilter_cuda.jl index 0ec82886..f75f42cf 100644 --- a/src/kernel/imfilter_cuda.jl +++ b/src/kernel/imfilter_cuda.jl @@ -2,15 +2,23 @@ # Licensed under the MIT License. See LICENCE in the project root. # ------------------------------------------------------------------ -@platform aware function init_imfilter_kernel({accelerator_count::(@atleast 1), accelerator_api::CUDA_API}) - println("Running on CUDA GPU") +@platform aware function init_imfilter_kernel({accelerator_count::(@atleast 1), + accelerator_manufacturer::NVIDIA, + accelerator_api::(@api CUDA)}) + println("Running on NVIDIA/CUDA GPU") end -@platform aware function array_kernel({accelerator_count::(@atleast 1), accelerator_api::CUDA_API}, array) CuArray(array) end +@platform aware array_kernel({accelerator_count::(@atleast 1), + accelerator_manufacturer::NVIDIA, + accelerator_api::(@api CUDA)}, array) = CuArray(array) -@platform aware function view_kernel({accelerator_count::(@atleast 1), accelerator_api::CUDA_API}, array, I) Array(array[I]) end +@platform aware view_kernel({accelerator_count::(@atleast 1), + accelerator_manufacturer::NVIDIA, + accelerator_api::(@api CUDA)}, array, I) = Array(array[I]) -@platform aware function imfilter_kernel({accelerator_count::(@atleast 1), accelerator_api::CUDA_API}, img, krn) +@platform aware function imfilter_kernel({accelerator_count::(@atleast 1), + accelerator_manufacturer::NVIDIA, + accelerator_api::(@api CUDA)}, img, krn) imfilter_cuda(img,krn) end diff --git a/src/kernel/imfilter_default.jl b/src/kernel/imfilter_default.jl index a998904d..24792e6b 100644 --- a/src/kernel/imfilter_default.jl +++ b/src/kernel/imfilter_default.jl @@ -6,9 +6,9 @@ println("Running on DEFAULT PLATFORM") end -@platform default function array_kernel(array) array end +@platform default array_kernel(array) = array -@platform default function view_kernel(array, I) view(array, I) end +@platform default view_kernel(array, I) = view(array, I) @platform default function imfilter_kernel(img, krn) imfilter_cpu(img, krn) diff --git a/src/kernel/imfilter_opencl.jl b/src/kernel/imfilter_opencl.jl index 460d1011..3541b958 100644 --- a/src/kernel/imfilter_opencl.jl +++ b/src/kernel/imfilter_opencl.jl @@ -6,9 +6,9 @@ println("Running on OpenCL device") end -@platform aware function array_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}, array) array end +@platform aware array_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}, array) = array -@platform aware function view_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}, array, I) view(array, I) end +@platform aware view_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}, array, I) = view(array, I) @platform aware function imfilter_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}, img, krn) imfilter_opencl(img, krn) diff --git a/src/kernels.jl b/src/kernels.jl index e51bbe79..a47debe8 100644 --- a/src/kernels.jl +++ b/src/kernels.jl @@ -4,3 +4,4 @@ @platform parameter clear @platform parameter accelerator_count @platform parameter accelerator_api +@platform parameter accelerator_manufacturer From 5d4200bdb96c6de122d19d6c55ce8999184fa8f9 Mon Sep 17 00:00:00 2001 From: Francisco Heron de Carvalho Junior Date: Thu, 3 Nov 2022 16:47:20 -0400 Subject: [PATCH 05/20] adjusts in CUDA kernel --- src/ImageQuilting.jl | 2 ++ src/kernel/imfilter_opencl.jl | 3 +++ 2 files changed, 5 insertions(+) diff --git a/src/ImageQuilting.jl b/src/ImageQuilting.jl index d6f8b01f..f26b79b1 100644 --- a/src/ImageQuilting.jl +++ b/src/ImageQuilting.jl @@ -46,6 +46,8 @@ function __init__() include("src/kernel/imfilter_opencl.jl") end +include("test_imfilter.jl") + export # functions iqsim, diff --git a/src/kernel/imfilter_opencl.jl b/src/kernel/imfilter_opencl.jl index 3541b958..5059a17c 100644 --- a/src/kernel/imfilter_opencl.jl +++ b/src/kernel/imfilter_opencl.jl @@ -61,6 +61,9 @@ function imfilter_opencl(img, krn) real_result = real.(result) finalsize = size(img) .- (size(krn) .- 1) + + GC.gc() + real_result[CartesianIndices(finalsize)] end From 7b9f1221fc3a12ec8277785db4bfb4318ddcae8f Mon Sep 17 00:00:00 2001 From: Francisco Heron de Carvalho Junior Date: Thu, 3 Nov 2022 17:02:01 -0400 Subject: [PATCH 06/20] adding testing file --- src/test_imfilter.jl | 46 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 src/test_imfilter.jl diff --git a/src/test_imfilter.jl b/src/test_imfilter.jl new file mode 100644 index 00000000..12b441b2 --- /dev/null +++ b/src/test_imfilter.jl @@ -0,0 +1,46 @@ +using BenchmarkTools + +const TOLERANCE = 1e-3 + +function allclose(x::AbstractArray{T}, y::AbstractArray{T}; rtol=1e-5, atol=1e-8) where {T} + @assert length(x) == length(y) + @inbounds begin + for i in 1:length(x) + xx, yy = x[i], y[i] + if !(isapprox(xx, yy; rtol=rtol, atol=atol)) + return false + end + end + end + return true +end + +function test_imfilter(N) + + @info "start" + + img = rand(Float64,(N,N,N)) + krn = rand(Float64,(10,10,10)) + + X = Int(round(N/2)) + + @info "FIRST RUN - GPU - CUDA" + r1 = @btime imfilter_cuda(CuArray($img), $krn) + @info "check = ", r1[X,X,X] + + @info("SECOND RUN - GPU - OpenCL") + #global GPU = gpu_setup() + r2 = @btime imfilter_opencl($img, $krn) + @info "check = ", r2[X,X,X] + + @info "THIRD RUN - CPU - ImageFiltering" + r3 = @btime imfilter($img, centered($krn), Inner(), Algorithm.FFT()) + @info "check = ", r3[X,X,X] + + @info "r1 & r2", allclose(r1, r2; rtol=1e-2, atol=1e-3) + @info "r1 & r3", allclose(r1, r3; rtol=1e-2, atol=1e-3) + @info "r2 & r3", allclose(r2, r3; rtol=1e-2, atol=1e-3) + + @info "end" + +end \ No newline at end of file From 806696f1852fd82b02deec2fa0acf69110328a48 Mon Sep 17 00:00:00 2001 From: Francisco Heron de Carvalho Junior Date: Thu, 3 Nov 2022 17:13:25 -0400 Subject: [PATCH 07/20] free resources --- src/kernel/imfilter_opencl.jl | 2 -- src/test_imfilter.jl | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/kernel/imfilter_opencl.jl b/src/kernel/imfilter_opencl.jl index 5059a17c..088219b9 100644 --- a/src/kernel/imfilter_opencl.jl +++ b/src/kernel/imfilter_opencl.jl @@ -62,8 +62,6 @@ function imfilter_opencl(img, krn) finalsize = size(img) .- (size(krn) .- 1) - GC.gc() - real_result[CartesianIndices(finalsize)] end diff --git a/src/test_imfilter.jl b/src/test_imfilter.jl index 12b441b2..691bb3f2 100644 --- a/src/test_imfilter.jl +++ b/src/test_imfilter.jl @@ -33,6 +33,9 @@ function test_imfilter(N) r2 = @btime imfilter_opencl($img, $krn) @info "check = ", r2[X,X,X] + CLFFT.api.clfftTeardown() + GC.gc() + @info "THIRD RUN - CPU - ImageFiltering" r3 = @btime imfilter($img, centered($krn), Inner(), Algorithm.FFT()) @info "check = ", r3[X,X,X] From 1a3c575e060b371ec565ab04689cceee9bb5c6ef Mon Sep 17 00:00:00 2001 From: ClaroHenrique Date: Fri, 4 Nov 2022 01:09:36 +0000 Subject: [PATCH 08/20] Use OpenCL imfilter with single precision floats --- src/kernel/imfilter_opencl.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/kernel/imfilter_opencl.jl b/src/kernel/imfilter_opencl.jl index 088219b9..f525a888 100644 --- a/src/kernel/imfilter_opencl.jl +++ b/src/kernel/imfilter_opencl.jl @@ -16,7 +16,7 @@ end function imfilter_opencl(img, krn) # retrieve basic info - T = ComplexF64 + T = ComplexF32 # retrieve global OpenCL info device, ctx, queue = cl.create_compute_context() @@ -67,9 +67,9 @@ end function build_mult_kernel(ctx) mult_kernel = " - __kernel void mult(__global const double2 *a, - __global const double2 *b, - __global double2 *c) + __kernel void mult(__global const float2 *a, + __global const float2 *b, + __global float2 *c) { int gid = get_global_id(0); c[gid].x = a[gid].x*b[gid].x - a[gid].y*b[gid].y; @@ -82,7 +82,7 @@ end function build_conj_kernel(ctx) conj_kernel = " - __kernel void conj(__global double2 *a) + __kernel void conj(__global float2 *a) { int gid = get_global_id(0); a[gid].y = -a[gid].y; From 07f421d573959be8d47f3b43100740d12f69a3d5 Mon Sep 17 00:00:00 2001 From: Francisco Heron de Carvalho Junior Date: Thu, 3 Nov 2022 21:43:26 -0400 Subject: [PATCH 09/20] opencl context outside kernel call --- src/kernel/imfilter_cuda.jl | 2 +- src/kernel/imfilter_default.jl | 2 +- src/kernel/imfilter_opencl.jl | 26 +++++++++++++++++--------- src/test_imfilter.jl | 8 +++----- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/src/kernel/imfilter_cuda.jl b/src/kernel/imfilter_cuda.jl index f75f42cf..910f6193 100644 --- a/src/kernel/imfilter_cuda.jl +++ b/src/kernel/imfilter_cuda.jl @@ -5,7 +5,7 @@ @platform aware function init_imfilter_kernel({accelerator_count::(@atleast 1), accelerator_manufacturer::NVIDIA, accelerator_api::(@api CUDA)}) - println("Running on NVIDIA/CUDA GPU") + @info "Running on NVIDIA/CUDA GPU" end @platform aware array_kernel({accelerator_count::(@atleast 1), diff --git a/src/kernel/imfilter_default.jl b/src/kernel/imfilter_default.jl index 24792e6b..bc67755d 100644 --- a/src/kernel/imfilter_default.jl +++ b/src/kernel/imfilter_default.jl @@ -3,7 +3,7 @@ # ------------------------------------------------------------------ @platform default function init_imfilter_kernel() - println("Running on DEFAULT PLATFORM") + @info "Running on DEFAULT PLATFORM" end @platform default array_kernel(array) = array diff --git a/src/kernel/imfilter_opencl.jl b/src/kernel/imfilter_opencl.jl index 088219b9..9077f022 100644 --- a/src/kernel/imfilter_opencl.jl +++ b/src/kernel/imfilter_opencl.jl @@ -3,7 +3,16 @@ # ------------------------------------------------------------------ @platform aware function init_imfilter_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}) - println("Running on OpenCL device") + @info "Running on OpenCL device" + init_opencl_context() +end + +function init_opencl_context() + # retrieve global OpenCL info + device_, ctx_, queue_ = cl.create_compute_context() + global device = device_ + global ctx = ctx_ + global queue = queue_ end @platform aware array_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}, array) = array @@ -14,12 +23,10 @@ end imfilter_opencl(img, krn) end + function imfilter_opencl(img, krn) # retrieve basic info - T = ComplexF64 - - # retrieve global OpenCL info - device, ctx, queue = cl.create_compute_context() + T = ComplexF32 # build OpenCL program kernels conj_kernel = build_conj_kernel(ctx) @@ -37,6 +44,7 @@ function imfilter_opencl(img, krn) fftkrn = T.(padkrn) # OpenCl setup + plan = CLFFT.Plan(T, ctx, size(fftimg)) CLFFT.set_layout!(plan, :interleaved, :interleaved) CLFFT.set_result!(plan, :inplace) @@ -67,9 +75,9 @@ end function build_mult_kernel(ctx) mult_kernel = " - __kernel void mult(__global const double2 *a, - __global const double2 *b, - __global double2 *c) + __kernel void mult(__global const float2 *a, + __global const float2 *b, + __global float2 *c) { int gid = get_global_id(0); c[gid].x = a[gid].x*b[gid].x - a[gid].y*b[gid].y; @@ -82,7 +90,7 @@ end function build_conj_kernel(ctx) conj_kernel = " - __kernel void conj(__global double2 *a) + __kernel void conj(__global float2 *a) { int gid = get_global_id(0); a[gid].y = -a[gid].y; diff --git a/src/test_imfilter.jl b/src/test_imfilter.jl index 691bb3f2..ea01f806 100644 --- a/src/test_imfilter.jl +++ b/src/test_imfilter.jl @@ -19,8 +19,8 @@ function test_imfilter(N) @info "start" - img = rand(Float64,(N,N,N)) - krn = rand(Float64,(10,10,10)) + img = rand(Float32,(N,N,N)) + krn = rand(Float32,(10,10,10)) X = Int(round(N/2)) @@ -30,12 +30,10 @@ function test_imfilter(N) @info("SECOND RUN - GPU - OpenCL") #global GPU = gpu_setup() + init_opencl_context() r2 = @btime imfilter_opencl($img, $krn) @info "check = ", r2[X,X,X] - CLFFT.api.clfftTeardown() - GC.gc() - @info "THIRD RUN - CPU - ImageFiltering" r3 = @btime imfilter($img, centered($krn), Inner(), Algorithm.FFT()) @info "check = ", r3[X,X,X] From f1de845fddb1f391058b91c312bfaa022ef31b3c Mon Sep 17 00:00:00 2001 From: Francisco Heron de Carvalho Junior <102302676+decarvalhojunior-fh@users.noreply.github.com> Date: Thu, 3 Nov 2022 21:49:34 -0400 Subject: [PATCH 10/20] OpenCL context outsilde the kernel function --- src/kernel/imfilter_opencl.jl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/kernel/imfilter_opencl.jl b/src/kernel/imfilter_opencl.jl index f525a888..a06f7ba5 100644 --- a/src/kernel/imfilter_opencl.jl +++ b/src/kernel/imfilter_opencl.jl @@ -3,7 +3,16 @@ # ------------------------------------------------------------------ @platform aware function init_imfilter_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}) - println("Running on OpenCL device") + @info "Running on OpenCL device" + init_opencl_context() +end + +function init_opencl_context() + # retrieve global OpenCL info + device_, ctx_, queue_ = cl.create_compute_context() + global device = device_ + global ctx = ctx_ + global queue = queue_ end @platform aware array_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}, array) = array @@ -18,9 +27,6 @@ function imfilter_opencl(img, krn) # retrieve basic info T = ComplexF32 - # retrieve global OpenCL info - device, ctx, queue = cl.create_compute_context() - # build OpenCL program kernels conj_kernel = build_conj_kernel(ctx) mult_kernel = build_mult_kernel(ctx) From 60604c4d1207c88117ad34335c3b363964d14049 Mon Sep 17 00:00:00 2001 From: Francisco Heron de Carvalho Junior Date: Sun, 6 Nov 2022 19:16:33 -0500 Subject: [PATCH 11/20] updated Project.toml --- Project.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/Project.toml b/Project.toml index 42f7bdc3..fabcb63b 100644 --- a/Project.toml +++ b/Project.toml @@ -10,7 +10,6 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" CpuId = "adafc99b-e345-5852-983c-f28acb93d879" FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" GeoStatsBase = "323cb8eb-fbf6-51c0-afd0-f8fba70507b2" -GeoStatsImages = "7cd16168-b42c-5e7d-a585-4f59d326662d" Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" GraphsFlows = "06909019-6f44-4949-96fc-b9d9aaa02889" ImageFiltering = "6a3955dd-da59-5b1f-98d4-e7296123deb5" @@ -33,12 +32,10 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" CUDA = "3.11" CpuId = "0.2, 0.3" FFTW = "1.0" -GeoStatsBase = "0.26" Graphs = "1.4" GraphsFlows = "0.1" ImageFiltering = "0.6, 0.7" ImageMorphology = "0.2, 0.3, 0.4" -Meshes = "0.22" ProgressMeter = "1.1" RecipesBase = "1.0" StatsBase = "0.32, 0.33" From 2f7f35614f26481b6d2887c45339fe5fdd27d6fe Mon Sep 17 00:00:00 2001 From: Francisco Heron de Carvalho Junior Date: Sun, 6 Nov 2022 19:23:58 -0500 Subject: [PATCH 12/20] experiment scripts --- run_experiment_app.jl | 17 ++++++++++++++ run_experiment_kernel.jl | 17 ++++++++++++++ run_sample.jl | 48 ++++++++++++++++++++++++++++++++++++++++ src/test_imfilter.jl | 8 +++---- 4 files changed, 86 insertions(+), 4 deletions(-) create mode 100644 run_experiment_app.jl create mode 100644 run_experiment_kernel.jl create mode 100644 run_sample.jl diff --git a/run_experiment_app.jl b/run_experiment_app.jl new file mode 100644 index 00000000..2733d0a5 --- /dev/null +++ b/run_experiment_app.jl @@ -0,0 +1,17 @@ +#!/bin/bash + + + +for size in 64 128 256 +do + for version in 2 + do + for turn in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 + do + echo $size.$version.$turn.adhoc + $JULIA_PATH/julia ./run_sample.jl $version $size $turn >> output.iq.adhoc.$version.$size + echo $size.$version.$turn.structured + PLATFORM_DESCRIPTION=Platform.$version.toml $JULIA_PATH/julia ./run_sample.jl -$version $size $turn >> output.iq.structured.$version.$size + done + done +done diff --git a/run_experiment_kernel.jl b/run_experiment_kernel.jl new file mode 100644 index 00000000..2733d0a5 --- /dev/null +++ b/run_experiment_kernel.jl @@ -0,0 +1,17 @@ +#!/bin/bash + + + +for size in 64 128 256 +do + for version in 2 + do + for turn in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 + do + echo $size.$version.$turn.adhoc + $JULIA_PATH/julia ./run_sample.jl $version $size $turn >> output.iq.adhoc.$version.$size + echo $size.$version.$turn.structured + PLATFORM_DESCRIPTION=Platform.$version.toml $JULIA_PATH/julia ./run_sample.jl -$version $size $turn >> output.iq.structured.$version.$size + done + done +done diff --git a/run_sample.jl b/run_sample.jl new file mode 100644 index 00000000..6dd95b18 --- /dev/null +++ b/run_sample.jl @@ -0,0 +1,48 @@ +import Pkg; Pkg.activate(".") +using ImageQuilting +using CUDA + +function main(args) + + @info args + + v = parse(Int64,args[1]) + size = parse(Int64,args[2]) + i = parse(Int64,args[3]) + + img = rand(Float32,(size,size,size)) + krn = rand(Float32,(10,10,10)) + + if (v < 0) + @info "$i: structured / $v" + img = ImageQuilting.array_kernel(img) + ImageQuilting.init_imfilter_kernel() + @time ImageQuilting.imfilter_kernel(img,krn) + ImageQuilting.imfilter_kernel(img,krn) + ImageQuilting.imfilter_kernel(img,krn) + @time ImageQuilting.imfilter_kernel(img,krn) + elseif (v == 1) + @info "$i: ad-hoc / default" + @time ImageQuilting.imfilter_cpu(img, krn) + @time ImageQuilting.imfilter_cpu(img, krn) + elseif (v == 2) + @info "$i: ad-hoc / CUDA" + img = CuArray(img) + @time ImageQuilting.imfilter_cuda(img, krn) + ImageQuilting.imfilter_cuda(img, krn) + ImageQuilting.imfilter_cuda(img, krn) + @time ImageQuilting.imfilter_cuda(img, krn) + elseif (v == 3) + @info "$i: ad-hoc / OpenCL" + ImageQuilting.init_opencl_context() + @time ImageQuilting.imfilter_opencl(img, krn) + ImageQuilting.imfilter_opencl(img, krn) + ImageQuilting.imfilter_opencl(img, krn) + @time ImageQuilting.imfilter_opencl(img, krn) + else + @info "wrong selection" + end +end + +main(ARGS) + diff --git a/src/test_imfilter.jl b/src/test_imfilter.jl index ea01f806..b3253597 100644 --- a/src/test_imfilter.jl +++ b/src/test_imfilter.jl @@ -25,17 +25,17 @@ function test_imfilter(N) X = Int(round(N/2)) @info "FIRST RUN - GPU - CUDA" - r1 = @btime imfilter_cuda(CuArray($img), $krn) + r1 = @time imfilter_cuda(CuArray(img), krn) @info "check = ", r1[X,X,X] @info("SECOND RUN - GPU - OpenCL") #global GPU = gpu_setup() init_opencl_context() - r2 = @btime imfilter_opencl($img, $krn) + r2 = @time imfilter_opencl(img, krn) @info "check = ", r2[X,X,X] @info "THIRD RUN - CPU - ImageFiltering" - r3 = @btime imfilter($img, centered($krn), Inner(), Algorithm.FFT()) + r3 = @time imfilter(img, centered(krn), Inner(), Algorithm.FFT()) @info "check = ", r3[X,X,X] @info "r1 & r2", allclose(r1, r2; rtol=1e-2, atol=1e-3) @@ -44,4 +44,4 @@ function test_imfilter(N) @info "end" -end \ No newline at end of file +end From 3c08a03b2d6d3e5e863248057a41efdc1fdad61e Mon Sep 17 00:00:00 2001 From: Francisco Heron de Carvalho Junior Date: Sun, 6 Nov 2022 19:25:28 -0500 Subject: [PATCH 13/20] experiment scripts --- run_sample.jl => run_sample_app.jl | 0 run_sample_kernel.jl | 48 ++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) rename run_sample.jl => run_sample_app.jl (100%) create mode 100644 run_sample_kernel.jl diff --git a/run_sample.jl b/run_sample_app.jl similarity index 100% rename from run_sample.jl rename to run_sample_app.jl diff --git a/run_sample_kernel.jl b/run_sample_kernel.jl new file mode 100644 index 00000000..6dd95b18 --- /dev/null +++ b/run_sample_kernel.jl @@ -0,0 +1,48 @@ +import Pkg; Pkg.activate(".") +using ImageQuilting +using CUDA + +function main(args) + + @info args + + v = parse(Int64,args[1]) + size = parse(Int64,args[2]) + i = parse(Int64,args[3]) + + img = rand(Float32,(size,size,size)) + krn = rand(Float32,(10,10,10)) + + if (v < 0) + @info "$i: structured / $v" + img = ImageQuilting.array_kernel(img) + ImageQuilting.init_imfilter_kernel() + @time ImageQuilting.imfilter_kernel(img,krn) + ImageQuilting.imfilter_kernel(img,krn) + ImageQuilting.imfilter_kernel(img,krn) + @time ImageQuilting.imfilter_kernel(img,krn) + elseif (v == 1) + @info "$i: ad-hoc / default" + @time ImageQuilting.imfilter_cpu(img, krn) + @time ImageQuilting.imfilter_cpu(img, krn) + elseif (v == 2) + @info "$i: ad-hoc / CUDA" + img = CuArray(img) + @time ImageQuilting.imfilter_cuda(img, krn) + ImageQuilting.imfilter_cuda(img, krn) + ImageQuilting.imfilter_cuda(img, krn) + @time ImageQuilting.imfilter_cuda(img, krn) + elseif (v == 3) + @info "$i: ad-hoc / OpenCL" + ImageQuilting.init_opencl_context() + @time ImageQuilting.imfilter_opencl(img, krn) + ImageQuilting.imfilter_opencl(img, krn) + ImageQuilting.imfilter_opencl(img, krn) + @time ImageQuilting.imfilter_opencl(img, krn) + else + @info "wrong selection" + end +end + +main(ARGS) + From 18ffbdc8f96f26179a586caa08916ccdf3d7359e Mon Sep 17 00:00:00 2001 From: Francisco Heron de Carvalho Junior Date: Sun, 6 Nov 2022 20:42:51 -0500 Subject: [PATCH 14/20] experiment scripts --- run_experiment_app.sh | 9 +++ run_experiment_kernel.jl | 17 ----- ...eriment_app.jl => run_experiment_kernel.sh | 0 run_sample_app.jl | 69 ++++++++----------- 4 files changed, 38 insertions(+), 57 deletions(-) create mode 100644 run_experiment_app.sh delete mode 100644 run_experiment_kernel.jl rename run_experiment_app.jl => run_experiment_kernel.sh (100%) diff --git a/run_experiment_app.sh b/run_experiment_app.sh new file mode 100644 index 00000000..8f178251 --- /dev/null +++ b/run_experiment_app.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +for turn in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 +do + echo $turn.adhoc + PATH_IQ="/home/tcarneiropessoa/heron/ImageQuilting-adhoc" $JULIA_PATH/julia./run_sample.jl $version $size $turn >> output.iq.app.adhoc + echo $turn.structured + PATH_IQ="/home/tcarneiropessoa/heron/ImageQuilting-structured" PLATFORM_DESCRIPTION=Platform.$version.toml $JULIA_PATH/julia ./run_sample.jl $turn >> output.iq.app.structured +done diff --git a/run_experiment_kernel.jl b/run_experiment_kernel.jl deleted file mode 100644 index 2733d0a5..00000000 --- a/run_experiment_kernel.jl +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - - - -for size in 64 128 256 -do - for version in 2 - do - for turn in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 - do - echo $size.$version.$turn.adhoc - $JULIA_PATH/julia ./run_sample.jl $version $size $turn >> output.iq.adhoc.$version.$size - echo $size.$version.$turn.structured - PLATFORM_DESCRIPTION=Platform.$version.toml $JULIA_PATH/julia ./run_sample.jl -$version $size $turn >> output.iq.structured.$version.$size - done - done -done diff --git a/run_experiment_app.jl b/run_experiment_kernel.sh similarity index 100% rename from run_experiment_app.jl rename to run_experiment_kernel.sh diff --git a/run_sample_app.jl b/run_sample_app.jl index 6dd95b18..644f7cee 100644 --- a/run_sample_app.jl +++ b/run_sample_app.jl @@ -1,47 +1,36 @@ -import Pkg; Pkg.activate(".") +path_iq = get(ENV,"PATH_IQ",".") +import Pkg; Pkg.activate(path_iq) +using GeoStats +using GeoStatsImages using ImageQuilting -using CUDA function main(args) - @info args - - v = parse(Int64,args[1]) - size = parse(Int64,args[2]) - i = parse(Int64,args[3]) - - img = rand(Float32,(size,size,size)) - krn = rand(Float32,(10,10,10)) - - if (v < 0) - @info "$i: structured / $v" - img = ImageQuilting.array_kernel(img) - ImageQuilting.init_imfilter_kernel() - @time ImageQuilting.imfilter_kernel(img,krn) - ImageQuilting.imfilter_kernel(img,krn) - ImageQuilting.imfilter_kernel(img,krn) - @time ImageQuilting.imfilter_kernel(img,krn) - elseif (v == 1) - @info "$i: ad-hoc / default" - @time ImageQuilting.imfilter_cpu(img, krn) - @time ImageQuilting.imfilter_cpu(img, krn) - elseif (v == 2) - @info "$i: ad-hoc / CUDA" - img = CuArray(img) - @time ImageQuilting.imfilter_cuda(img, krn) - ImageQuilting.imfilter_cuda(img, krn) - ImageQuilting.imfilter_cuda(img, krn) - @time ImageQuilting.imfilter_cuda(img, krn) - elseif (v == 3) - @info "$i: ad-hoc / OpenCL" - ImageQuilting.init_opencl_context() - @time ImageQuilting.imfilter_opencl(img, krn) - ImageQuilting.imfilter_opencl(img, krn) - ImageQuilting.imfilter_opencl(img, krn) - @time ImageQuilting.imfilter_opencl(img, krn) - else - @info "wrong selection" - end + @info args + + i = parse(Int64,args[1]) + + @info "large $i" + + # large + TIₗ = geostatsimage("Fluvsim") + iqsim(asarray(TIₗ, :facies), (30, 30, 30)) + @time iqsim(asarray(TIₗ, :facies), (30, 30, 30)) + + @info "medium $i" + + # medium + TIₘ = geostatsimage("StanfordV") + iqsim(asarray(TIₘ, :K), (30, 30, 30)) + @time iqsim(asarray(TIₘ, :K), (30, 30, 30)) + + @info "small $i" + + # small + TIₛ = geostatsimage("WalkerLake") + iqsim(asarray(TIₛ, :Z), (30, 30)) + @time iqsim(asarray(TIₛ, :Z), (30, 30)) + end main(ARGS) From 1dc2d288f0f97329c2faaa27a592ae04cb36d596 Mon Sep 17 00:00:00 2001 From: Francisco Heron de Carvalho Junior Date: Sun, 6 Nov 2022 21:14:54 -0500 Subject: [PATCH 15/20] experiment scripts --- run_experiment_app.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/run_experiment_app.sh b/run_experiment_app.sh index 8f178251..3ea261be 100644 --- a/run_experiment_app.sh +++ b/run_experiment_app.sh @@ -3,7 +3,8 @@ for turn in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 do echo $turn.adhoc - PATH_IQ="/home/tcarneiropessoa/heron/ImageQuilting-adhoc" $JULIA_PATH/julia./run_sample.jl $version $size $turn >> output.iq.app.adhoc + PATH_IQ="/home/tcarneiropessoa/heron/ImageQuilting-adhoc" $JULIA_PATH/julia ./run_sample.jl $turn >> output.iq.app.adhoc echo $turn.structured - PATH_IQ="/home/tcarneiropessoa/heron/ImageQuilting-structured" PLATFORM_DESCRIPTION=Platform.$version.toml $JULIA_PATH/julia ./run_sample.jl $turn >> output.iq.app.structured +# PATH_IQ="/home/tcarneiropessoa/heron/ImageQuilting-structured" PLATFORM_DESCRIPTION=Platform.$version.toml $JULIA_PATH/julia ./run_sample.jl $turn >> output.iq.app.structured + PATH_IQ="/home/tcarneiropessoa/heron/ImageQuilting-structured" $JULIA_PATH/julia ./run_sample.jl $turn >> output.iq.app.structured done From 470cc03c794345dc41c05c94da9a0ac6bf92035e Mon Sep 17 00:00:00 2001 From: Francisco Heron de Carvalho Junior Date: Sun, 6 Nov 2022 23:15:23 -0500 Subject: [PATCH 16/20] experiment scripts --- run_experiment_app.sh | 7 ++++--- run_experiment_kernel.sh | 0 run_sample_app.jl | 20 ++++++++++---------- 3 files changed, 14 insertions(+), 13 deletions(-) mode change 100644 => 100755 run_experiment_app.sh mode change 100644 => 100755 run_experiment_kernel.sh diff --git a/run_experiment_app.sh b/run_experiment_app.sh old mode 100644 new mode 100755 index 3ea261be..cc5ab35f --- a/run_experiment_app.sh +++ b/run_experiment_app.sh @@ -3,8 +3,9 @@ for turn in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 do echo $turn.adhoc - PATH_IQ="/home/tcarneiropessoa/heron/ImageQuilting-adhoc" $JULIA_PATH/julia ./run_sample.jl $turn >> output.iq.app.adhoc + PATH_IQ="/home/tcarneiropessoa/heron/ImageQuilting-adhoc" $JULIA_PATH/julia ./run_sample_app.jl $turn >> output.iq.app.adhoc +# PATH_IQ="/home/heron/Dropbox/Copy/ufc_mdcc_hpc/JuliaEarth-work/PlatformAwareIQ-adhoc" $JULIA_PATH/julia ./run_sample_app.jl $turn >> output.iq.app.adhoc echo $turn.structured -# PATH_IQ="/home/tcarneiropessoa/heron/ImageQuilting-structured" PLATFORM_DESCRIPTION=Platform.$version.toml $JULIA_PATH/julia ./run_sample.jl $turn >> output.iq.app.structured - PATH_IQ="/home/tcarneiropessoa/heron/ImageQuilting-structured" $JULIA_PATH/julia ./run_sample.jl $turn >> output.iq.app.structured + PATH_IQ="/home/tcarneiropessoa/heron/ImageQuilting-structured" $JULIA_PATH/julia ./run_sample_app.jl $turn >> output.iq.app.structured +# PATH_IQ="/home/heron/Dropbox/Copy/ufc_mdcc_hpc/JuliaEarth-work/PlatformAwareIQ" $JULIA_PATH/julia ./run_sample_app.jl $turn >> output.iq.app.structured done diff --git a/run_experiment_kernel.sh b/run_experiment_kernel.sh old mode 100644 new mode 100755 diff --git a/run_sample_app.jl b/run_sample_app.jl index 644f7cee..52ba33a4 100644 --- a/run_sample_app.jl +++ b/run_sample_app.jl @@ -10,12 +10,12 @@ function main(args) i = parse(Int64,args[1]) - @info "large $i" + @info "small $i" - # large - TIₗ = geostatsimage("Fluvsim") - iqsim(asarray(TIₗ, :facies), (30, 30, 30)) - @time iqsim(asarray(TIₗ, :facies), (30, 30, 30)) + # small + TIₛ = geostatsimage("WalkerLake") + iqsim(asarray(TIₛ, :Z), (30, 30)) + @time iqsim(asarray(TIₛ, :Z), (30, 30)) @info "medium $i" @@ -24,12 +24,12 @@ function main(args) iqsim(asarray(TIₘ, :K), (30, 30, 30)) @time iqsim(asarray(TIₘ, :K), (30, 30, 30)) - @info "small $i" + @info "large $i" - # small - TIₛ = geostatsimage("WalkerLake") - iqsim(asarray(TIₛ, :Z), (30, 30)) - @time iqsim(asarray(TIₛ, :Z), (30, 30)) + # large + TIₗ = geostatsimage("Fluvsim") + iqsim(asarray(TIₗ, :facies), (30, 30, 30)) + @time iqsim(asarray(TIₗ, :facies), (30, 30, 30)) end From 41edfb5a7f8d717392b01f71976ede317ab3887f Mon Sep 17 00:00:00 2001 From: Francisco Heron de Carvalho Junior Date: Sun, 6 Nov 2022 23:18:58 -0500 Subject: [PATCH 17/20] experiment scripts --- src/kernel/imfilter_opencl.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/kernel/imfilter_opencl.jl b/src/kernel/imfilter_opencl.jl index 9077f022..3e6c644c 100644 --- a/src/kernel/imfilter_opencl.jl +++ b/src/kernel/imfilter_opencl.jl @@ -44,7 +44,6 @@ function imfilter_opencl(img, krn) fftkrn = T.(padkrn) # OpenCl setup - plan = CLFFT.Plan(T, ctx, size(fftimg)) CLFFT.set_layout!(plan, :interleaved, :interleaved) CLFFT.set_result!(plan, :inplace) From 0be2df4d247a27371dd93b44991afa3cc82336ef Mon Sep 17 00:00:00 2001 From: Francisco Heron de Carvalho Junior Date: Mon, 7 Nov 2022 11:18:46 -0500 Subject: [PATCH 18/20] adhoc version --- run_experiment_app.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/run_experiment_app.sh b/run_experiment_app.sh index cc5ab35f..b359b77b 100755 --- a/run_experiment_app.sh +++ b/run_experiment_app.sh @@ -3,9 +3,9 @@ for turn in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 do echo $turn.adhoc - PATH_IQ="/home/tcarneiropessoa/heron/ImageQuilting-adhoc" $JULIA_PATH/julia ./run_sample_app.jl $turn >> output.iq.app.adhoc -# PATH_IQ="/home/heron/Dropbox/Copy/ufc_mdcc_hpc/JuliaEarth-work/PlatformAwareIQ-adhoc" $JULIA_PATH/julia ./run_sample_app.jl $turn >> output.iq.app.adhoc + git switch iq-sim-refact + PATH_IQ="." $JULIA_PATH/julia ./run_sample_app.jl $turn >> output.iq.app.adhoc echo $turn.structured - PATH_IQ="/home/tcarneiropessoa/heron/ImageQuilting-structured" $JULIA_PATH/julia ./run_sample_app.jl $turn >> output.iq.app.structured -# PATH_IQ="/home/heron/Dropbox/Copy/ufc_mdcc_hpc/JuliaEarth-work/PlatformAwareIQ" $JULIA_PATH/julia ./run_sample_app.jl $turn >> output.iq.app.structured + git switch iq-sim-refact-adhoc + PATH_IQ="." $JULIA_PATH/julia ./run_sample_app.jl $turn >> output.iq.app.structured done From 9fb35664f54e89fea47602d64eedd698d7b2a2b5 Mon Sep 17 00:00:00 2001 From: Francisco Heron de Carvalho Junior Date: Mon, 7 Nov 2022 11:25:29 -0500 Subject: [PATCH 19/20] ... --- run_experiment_app.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run_experiment_app.sh b/run_experiment_app.sh index b359b77b..65544e55 100755 --- a/run_experiment_app.sh +++ b/run_experiment_app.sh @@ -3,9 +3,9 @@ for turn in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 do echo $turn.adhoc - git switch iq-sim-refact + git switch iq-sim-refact-adhoc PATH_IQ="." $JULIA_PATH/julia ./run_sample_app.jl $turn >> output.iq.app.adhoc echo $turn.structured - git switch iq-sim-refact-adhoc + git switch iq-sim-refact PATH_IQ="." $JULIA_PATH/julia ./run_sample_app.jl $turn >> output.iq.app.structured done From 428d47c6a8a98a851d04ab7c451620a287e8f285 Mon Sep 17 00:00:00 2001 From: Francisco Heron de Carvalho Junior Date: Mon, 7 Nov 2022 15:46:22 -0500 Subject: [PATCH 20/20] CUDA kernel adjust --- src/kernel/imfilter_cuda.jl | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/kernel/imfilter_cuda.jl b/src/kernel/imfilter_cuda.jl index 910f6193..40514f33 100644 --- a/src/kernel/imfilter_cuda.jl +++ b/src/kernel/imfilter_cuda.jl @@ -10,30 +10,35 @@ end @platform aware array_kernel({accelerator_count::(@atleast 1), accelerator_manufacturer::NVIDIA, - accelerator_api::(@api CUDA)}, array) = CuArray(array) + accelerator_api::(@api CUDA)}, array) =CuArray{Float32}(array) @platform aware view_kernel({accelerator_count::(@atleast 1), accelerator_manufacturer::NVIDIA, accelerator_api::(@api CUDA)}, array, I) = Array(array[I]) +counter = Ref{Int}(0) + + @platform aware function imfilter_kernel({accelerator_count::(@atleast 1), accelerator_manufacturer::NVIDIA, accelerator_api::(@api CUDA)}, img, krn) + + counter[] = counter[] + 1 imfilter_cuda(img,krn) end function imfilter_cuda(img, krn) - + # pad kernel to common size with image - padkrn = CUDA.zeros(size(img)) - copyto!(padkrn, CartesianIndices(krn), CuArray(krn), CartesianIndices(krn)) + padkrn = CUDA.zeros(Float32, size(img)) + copyto!(padkrn, CartesianIndices(krn), CuArray{Float32}(krn), CartesianIndices(krn)) # perform ifft(fft(img) .* conj.(fft(krn))) fftimg = img |> CUFFT.fft - fftkrn = padkrn |> CuArray |> CUFFT.fft + fftkrn = padkrn |> CUFFT.fft result = (fftimg .* conj.(fftkrn)) |> CUFFT.ifft # recover result finalsize = size(img) .- (size(krn) .- 1) real.(result[CartesianIndices(finalsize)]) |> Array -end + end