diff --git a/Project.toml b/Project.toml index 42f7bdc3..fabcb63b 100644 --- a/Project.toml +++ b/Project.toml @@ -10,7 +10,6 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" CpuId = "adafc99b-e345-5852-983c-f28acb93d879" FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" GeoStatsBase = "323cb8eb-fbf6-51c0-afd0-f8fba70507b2" -GeoStatsImages = "7cd16168-b42c-5e7d-a585-4f59d326662d" Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" GraphsFlows = "06909019-6f44-4949-96fc-b9d9aaa02889" ImageFiltering = "6a3955dd-da59-5b1f-98d4-e7296123deb5" @@ -33,12 +32,10 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" CUDA = "3.11" CpuId = "0.2, 0.3" FFTW = "1.0" -GeoStatsBase = "0.26" Graphs = "1.4" GraphsFlows = "0.1" ImageFiltering = "0.6, 0.7" ImageMorphology = "0.2, 0.3, 0.4" -Meshes = "0.22" ProgressMeter = "1.1" RecipesBase = "1.0" StatsBase = "0.32, 0.33" diff --git a/run_experiment_app.sh b/run_experiment_app.sh new file mode 100755 index 00000000..65544e55 --- /dev/null +++ b/run_experiment_app.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +for turn in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 +do + echo $turn.adhoc + git switch iq-sim-refact-adhoc + PATH_IQ="." $JULIA_PATH/julia ./run_sample_app.jl $turn >> output.iq.app.adhoc + echo $turn.structured + git switch iq-sim-refact + PATH_IQ="." $JULIA_PATH/julia ./run_sample_app.jl $turn >> output.iq.app.structured +done diff --git a/run_experiment_kernel.sh b/run_experiment_kernel.sh new file mode 100755 index 00000000..2733d0a5 --- /dev/null +++ b/run_experiment_kernel.sh @@ -0,0 +1,17 @@ +#!/bin/bash + + + +for size in 64 128 256 +do + for version in 2 + do + for turn in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 + do + echo $size.$version.$turn.adhoc + $JULIA_PATH/julia ./run_sample.jl $version $size $turn >> output.iq.adhoc.$version.$size + echo $size.$version.$turn.structured + PLATFORM_DESCRIPTION=Platform.$version.toml $JULIA_PATH/julia ./run_sample.jl -$version $size $turn >> output.iq.structured.$version.$size + done + done +done diff --git a/run_sample_app.jl b/run_sample_app.jl new file mode 100644 index 00000000..52ba33a4 --- /dev/null +++ b/run_sample_app.jl @@ -0,0 +1,37 @@ +path_iq = get(ENV,"PATH_IQ",".") +import Pkg; Pkg.activate(path_iq) +using GeoStats +using GeoStatsImages +using ImageQuilting + +function main(args) + + @info args + + i = parse(Int64,args[1]) + + @info "small $i" + + # small + TIₛ = geostatsimage("WalkerLake") + iqsim(asarray(TIₛ, :Z), (30, 30)) + @time iqsim(asarray(TIₛ, :Z), (30, 30)) + + @info "medium $i" + + # medium + TIₘ = geostatsimage("StanfordV") + iqsim(asarray(TIₘ, :K), (30, 30, 30)) + @time iqsim(asarray(TIₘ, :K), (30, 30, 30)) + + @info "large $i" + + # large + TIₗ = geostatsimage("Fluvsim") + iqsim(asarray(TIₗ, :facies), (30, 30, 30)) + @time iqsim(asarray(TIₗ, :facies), (30, 30, 30)) + +end + +main(ARGS) + diff --git a/run_sample_kernel.jl b/run_sample_kernel.jl new file mode 100644 index 00000000..6dd95b18 --- /dev/null +++ b/run_sample_kernel.jl @@ -0,0 +1,48 @@ +import Pkg; Pkg.activate(".") +using ImageQuilting +using CUDA + +function main(args) + + @info args + + v = parse(Int64,args[1]) + size = parse(Int64,args[2]) + i = parse(Int64,args[3]) + + img = rand(Float32,(size,size,size)) + krn = rand(Float32,(10,10,10)) + + if (v < 0) + @info "$i: structured / $v" + img = ImageQuilting.array_kernel(img) + ImageQuilting.init_imfilter_kernel() + @time ImageQuilting.imfilter_kernel(img,krn) + ImageQuilting.imfilter_kernel(img,krn) + ImageQuilting.imfilter_kernel(img,krn) + @time ImageQuilting.imfilter_kernel(img,krn) + elseif (v == 1) + @info "$i: ad-hoc / default" + @time ImageQuilting.imfilter_cpu(img, krn) + @time ImageQuilting.imfilter_cpu(img, krn) + elseif (v == 2) + @info "$i: ad-hoc / CUDA" + img = CuArray(img) + @time ImageQuilting.imfilter_cuda(img, krn) + ImageQuilting.imfilter_cuda(img, krn) + ImageQuilting.imfilter_cuda(img, krn) + @time ImageQuilting.imfilter_cuda(img, krn) + elseif (v == 3) + @info "$i: ad-hoc / OpenCL" + ImageQuilting.init_opencl_context() + @time ImageQuilting.imfilter_opencl(img, krn) + ImageQuilting.imfilter_opencl(img, krn) + ImageQuilting.imfilter_opencl(img, krn) + @time ImageQuilting.imfilter_opencl(img, krn) + else + @info "wrong selection" + end +end + +main(ARGS) + diff --git a/src/ImageQuilting.jl b/src/ImageQuilting.jl index 4afbcece..f26b79b1 100644 --- a/src/ImageQuilting.jl +++ b/src/ImageQuilting.jl @@ -17,23 +17,20 @@ using ProgressMeter: Progress, next! using FFTW: set_num_threads using CpuId: cpucores using RecipesBase -using Primes + using CUDA using OpenCL using CLFFT -const clfft = CLFFT +using PlatformAware using Base: @nexprs, @nloops, @nref using SparseArrays: spzeros using Statistics: mean, std using Random -using PlatformAware - import GeoStatsBase: preprocess, solvesingle include("utils.jl") -include("utils_gpu.jl") include("plotrecipes.jl") include("relaxation.jl") include("taumodel.jl") @@ -42,15 +39,15 @@ include("iqsim.jl") include("voxelreuse.jl") include("geostats.jl") -include("test_imfilter.jl") - -function __init__() - include(pkgdir(@__MODULE__) * "/src/kernels.jl") - include(pkgdir(@__MODULE__) * "/src/imfilter.default.jl") - include(pkgdir(@__MODULE__) * "/src/imfilter.cuda.jl") - include(pkgdir(@__MODULE__) * "/src/imfilter.opencl.jl") +function __init__() + include("src/kernels.jl") + include("src/kernel/imfilter_default.jl") + include("src/kernel/imfilter_cuda.jl") + include("src/kernel/imfilter_opencl.jl") end +include("test_imfilter.jl") + export # functions iqsim, diff --git a/src/imfilter.cuda.jl b/src/imfilter.cuda.jl deleted file mode 100644 index d4cc852a..00000000 --- a/src/imfilter.cuda.jl +++ /dev/null @@ -1,34 +0,0 @@ -# ------------------------------------------------------------------ -# Licensed under the MIT License. See LICENCE in the project root. -# ------------------------------------------------------------------ - -@platform aware function init_imfilter_kernel({accelerator_count::(@atleast 1), accelerator_api::CUDA_API}) - println("Running on CUDA GPU") -end - -@platform aware function array_kernel({accelerator_count::(@atleast 1), accelerator_api::CUDA_API}, array) CuArray(array) end - -@platform aware function view_kernel({accelerator_count::(@atleast 1), accelerator_api::CUDA_API}, array, I) Array(array[I]) end - -@platform aware function imfilter_kernel({accelerator_count::(@atleast 1), accelerator_api::CUDA_API}, img, krn) - imfilter_cuda(img,krn) - end - - -function imfilter_cuda(img, krn) - - # pad kernel to common size with image - padkrn = CUDA.zeros(size(img)) - copyto!(padkrn, CartesianIndices(krn), CuArray(krn), CartesianIndices(krn)) - - # perform ifft(fft(img) .* conj.(fft(krn))) - fftimg = img |> CUFFT.fft - fftkrn = padkrn |> CuArray |> CUFFT.fft - result = (fftimg .* conj.(fftkrn)) |> CUFFT.ifft - - # recover result - finalsize = size(img) .- (size(krn) .- 1) - real.(result[CartesianIndices(finalsize)]) |> Array - -end - diff --git a/src/imfilter.opencl.jl b/src/imfilter.opencl.jl deleted file mode 100644 index c9c1f005..00000000 --- a/src/imfilter.opencl.jl +++ /dev/null @@ -1,111 +0,0 @@ -# ------------------------------------------------------------------ -# Licensed under the MIT License. See LICENCE in the project root. -# ------------------------------------------------------------------ - -@platform aware function init_imfilter_kernel({accelerator_count::(@atleast 1), accelerator_api::OpenCL_API}) - println("Running on OpenCL GPU") - global GPU = gpu_setup() -end - -@platform aware function array_kernel({accelerator_count::(@atleast 1), accelerator_api::OpenCL_API}, array) array end - -@platform aware function view_kernel({accelerator_count::(@atleast 1), accelerator_api::OpenCL_API}, array, I) view(array, I) end - -@platform aware function imfilter_kernel({accelerator_count::(@atleast 1), accelerator_api::OpenCL_API}, img, kern) - imfilter_opencl(img,kern) -end - - -function imfilter_opencl(img, kern) - # retrieve basic info - N = ndims(img) - T = ComplexF64 - - # GPU metadata - ctx = GPU.ctx; queue = GPU.queue - mult_kernel = GPU.mult_kernel - - # operations with complex type - img = T.(img) - kern = T.(kern) - - # kernel may require padding - prepad = ntuple(d->(size(kern,d)-1) ÷ 2, N) - postpad = ntuple(d->(size(kern,d) ) ÷ 2, N) - - # OpenCL FFT expects powers of 2, 3, 5, 7, 11 or 13 - clpad = clfftpad(img) - A = padarray(img, Pad(:symmetric, zeros(Int, ndims(img)), clpad)) - A = parent(A) - - krn = zeros(T, size(A)) - indexesK = ntuple(d->[size(A,d)-prepad[d]+1:size(A,d);1:size(kern,d)-prepad[d]], N) - krn[indexesK...] = reflect(kern) - - # plan FFT - p = clfft.Plan(T, ctx, size(A)) - clfft.set_layout!(p, :interleaved, :interleaved) - clfft.set_result!(p, :inplace) - clfft.bake!(p, queue) - - # populate GPU memory - bufA = cl.Buffer(T, ctx, :copy, hostbuf=A) - bufkrn = cl.Buffer(T, ctx, :copy, hostbuf=krn) - bufRES = cl.Buffer(T, ctx, length(A)) - - # compute ifft(fft(A).*fft(kern)) - clfft.enqueue_transform(p, :forward, [queue], bufA, nothing) - clfft.enqueue_transform(p, :forward, [queue], bufkrn, nothing) - queue(mult_kernel, length(A), nothing, bufA, bufkrn, bufRES) - - clfft.enqueue_transform(p, :backward, [queue], bufRES, nothing) - - # get result back - AF = reshape(cl.read(queue, bufRES), size(A)) - - # undo OpenCL FFT paddings - AF = view(AF, ntuple(d->1:size(AF,d)-clpad[d], N)...) - - out = Array{realtype(eltype(AF))}(undef, ntuple(d->size(img,d) - prepad[d] - postpad[d], N)...) - indexesA = ntuple(d->postpad[d]+1:size(img,d)-prepad[d], N) - copyreal!(out, AF, indexesA) - - out - - end - - @generated function reflect(A::AbstractArray{T,N}) where {T,N} - quote - B = Array{T,N}(undef,size(A)) - @nexprs $N d->(n_d = size(A, d)+1) - @nloops $N i A d->(j_d = n_d - i_d) begin - @nref($N, B, j) = @nref($N, A, i) - end - B - end - end - - -for N = 1:5 - @eval begin - function copyreal!(dst::Array{T,$N}, src, I::Tuple{Vararg{UnitRange{Int}}}) where {T<:Real} - @nexprs $N d->(I_d = I[d]) - @nloops $N i dst d->(j_d = first(I_d)+i_d-1) begin - (@nref $N dst i) = real(@nref $N src j) - end - dst - end - function copyreal!(dst::Array{T,$N}, src, I::Tuple{Vararg{UnitRange{Int}}}) where {T<:Complex} - @nexprs $N d->I_d = I[d] - @nloops $N i dst d->(j_d = first(I_d)+i_d-1) begin - (@nref $N dst i) = @nref $N src j - end - dst - end - end -end - -realtype(::Type{R}) where {R<:Real} = R -realtype(::Type{Complex{R}}) where {R<:Real} = R - - diff --git a/src/kernel/imfilter_cuda.jl b/src/kernel/imfilter_cuda.jl new file mode 100644 index 00000000..40514f33 --- /dev/null +++ b/src/kernel/imfilter_cuda.jl @@ -0,0 +1,44 @@ +# ------------------------------------------------------------------ +# Licensed under the MIT License. See LICENCE in the project root. +# ------------------------------------------------------------------ + +@platform aware function init_imfilter_kernel({accelerator_count::(@atleast 1), + accelerator_manufacturer::NVIDIA, + accelerator_api::(@api CUDA)}) + @info "Running on NVIDIA/CUDA GPU" +end + +@platform aware array_kernel({accelerator_count::(@atleast 1), + accelerator_manufacturer::NVIDIA, + accelerator_api::(@api CUDA)}, array) =CuArray{Float32}(array) + +@platform aware view_kernel({accelerator_count::(@atleast 1), + accelerator_manufacturer::NVIDIA, + accelerator_api::(@api CUDA)}, array, I) = Array(array[I]) + +counter = Ref{Int}(0) + + +@platform aware function imfilter_kernel({accelerator_count::(@atleast 1), + accelerator_manufacturer::NVIDIA, + accelerator_api::(@api CUDA)}, img, krn) + + counter[] = counter[] + 1 + imfilter_cuda(img,krn) +end + +function imfilter_cuda(img, krn) + + # pad kernel to common size with image + padkrn = CUDA.zeros(Float32, size(img)) + copyto!(padkrn, CartesianIndices(krn), CuArray{Float32}(krn), CartesianIndices(krn)) + + # perform ifft(fft(img) .* conj.(fft(krn))) + fftimg = img |> CUFFT.fft + fftkrn = padkrn |> CUFFT.fft + result = (fftimg .* conj.(fftkrn)) |> CUFFT.ifft + + # recover result + finalsize = size(img) .- (size(krn) .- 1) + real.(result[CartesianIndices(finalsize)]) |> Array + end diff --git a/src/imfilter.default.jl b/src/kernel/imfilter_default.jl similarity index 65% rename from src/imfilter.default.jl rename to src/kernel/imfilter_default.jl index ea125534..bc67755d 100644 --- a/src/imfilter.default.jl +++ b/src/kernel/imfilter_default.jl @@ -3,15 +3,17 @@ # ------------------------------------------------------------------ @platform default function init_imfilter_kernel() - println("Running on DEFAULT PLATFORM") + @info "Running on DEFAULT PLATFORM" end -@platform default function array_kernel(array) array end +@platform default array_kernel(array) = array -@platform default function view_kernel(array, I) view(array, I) end +@platform default view_kernel(array, I) = view(array, I) @platform default function imfilter_kernel(img, krn) - imfilter(img, centered(krn), Inner(), Algorithm.FFT()) + imfilter_cpu(img, krn) end - +function imfilter_cpu(img, krn) + imfilter(img, centered(krn), Inner(), Algorithm.FFT()) +end diff --git a/src/kernel/imfilter_opencl.jl b/src/kernel/imfilter_opencl.jl new file mode 100644 index 00000000..3e6c644c --- /dev/null +++ b/src/kernel/imfilter_opencl.jl @@ -0,0 +1,110 @@ +# ------------------------------------------------------------------ +# Licensed under the MIT License. See LICENCE in the project root. +# ------------------------------------------------------------------ + +@platform aware function init_imfilter_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}) + @info "Running on OpenCL device" + init_opencl_context() +end + +function init_opencl_context() + # retrieve global OpenCL info + device_, ctx_, queue_ = cl.create_compute_context() + global device = device_ + global ctx = ctx_ + global queue = queue_ +end + +@platform aware array_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}, array) = array + +@platform aware view_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}, array, I) = view(array, I) + +@platform aware function imfilter_kernel({accelerator_count::(@atleast 1), accelerator_api::(@api OpenCL)}, img, krn) + imfilter_opencl(img, krn) +end + + +function imfilter_opencl(img, krn) + # retrieve basic info + T = ComplexF32 + + # build OpenCL program kernels + conj_kernel = build_conj_kernel(ctx) + mult_kernel = build_mult_kernel(ctx) + + # pad img to support CLFFT operations + padimg = pad_opencl_img(img) + + # pad krn to common size with padimg + padkrn = zeros(eltype(img), size(padimg)) + padkrn[CartesianIndices(krn)] = krn + + # convert to Complex + fftimg = T.(padimg) + fftkrn = T.(padkrn) + + # OpenCl setup + plan = CLFFT.Plan(T, ctx, size(fftimg)) + CLFFT.set_layout!(plan, :interleaved, :interleaved) + CLFFT.set_result!(plan, :inplace) + CLFFT.bake!(plan, queue) + + # populate device memory + bufimg = cl.Buffer(T, ctx, :copy, hostbuf=fftimg) + bufkrn = cl.Buffer(T, ctx, :copy, hostbuf=fftkrn) + bufres = cl.Buffer(T, ctx, :w, length(fftimg)) + + # transform img and krn to FFT representation + CLFFT.enqueue_transform(plan, :forward, [queue], bufimg, nothing) + CLFFT.enqueue_transform(plan, :forward, [queue], bufkrn, nothing) + + # compute ifft(fft(A).*conj.(fft(krn))) + queue(conj_kernel, length(fftimg), nothing, bufkrn) + queue(mult_kernel, length(fftimg), nothing, bufimg, bufkrn, bufres) + CLFFT.enqueue_transform(plan, :backward, [queue], bufres, nothing) + + # recover result + result = reshape(cl.read(queue, bufres), size(fftimg)) + real_result = real.(result) + + finalsize = size(img) .- (size(krn) .- 1) + + real_result[CartesianIndices(finalsize)] +end + +function build_mult_kernel(ctx) + mult_kernel = " + __kernel void mult(__global const float2 *a, + __global const float2 *b, + __global float2 *c) + { + int gid = get_global_id(0); + c[gid].x = a[gid].x*b[gid].x - a[gid].y*b[gid].y; + c[gid].y = a[gid].x*b[gid].y + a[gid].y*b[gid].x; + } + " + prog = cl.Program(ctx, source=mult_kernel) |> cl.build! + cl.Kernel(prog, "mult") +end + +function build_conj_kernel(ctx) + conj_kernel = " + __kernel void conj(__global float2 *a) + { + int gid = get_global_id(0); + a[gid].y = -a[gid].y; + } + " + prog = cl.Program(ctx, source=conj_kernel) |> cl.build! + cl.Kernel(prog, "conj") +end + +function pad_opencl_img(img) + # OpenCL FFT expects products of powers of 2, 3, 5, 7, 11 or 13 + radices = CLFFT.supported_radices() + newsize = map(dim -> nextprod(radices, dim), size(img)) + + padimg = zeros(eltype(img), newsize) + padimg[CartesianIndices(img)] = img + padimg +end diff --git a/src/kernels.jl b/src/kernels.jl index e51bbe79..a47debe8 100644 --- a/src/kernels.jl +++ b/src/kernels.jl @@ -4,3 +4,4 @@ @platform parameter clear @platform parameter accelerator_count @platform parameter accelerator_api +@platform parameter accelerator_manufacturer diff --git a/src/test_imfilter.jl b/src/test_imfilter.jl index 5f46d24c..b3253597 100644 --- a/src/test_imfilter.jl +++ b/src/test_imfilter.jl @@ -19,22 +19,23 @@ function test_imfilter(N) @info "start" - img = rand(Float64,(N,N,N)) - krn = rand(Float64,(10,10,10)) + img = rand(Float32,(N,N,N)) + krn = rand(Float32,(10,10,10)) X = Int(round(N/2)) @info "FIRST RUN - GPU - CUDA" - r1 = @btime imfilter_cuda(CuArray($img), $krn) + r1 = @time imfilter_cuda(CuArray(img), krn) @info "check = ", r1[X,X,X] @info("SECOND RUN - GPU - OpenCL") - global GPU = gpu_setup() - r2 = @btime imfilter_opencl($img, $krn) + #global GPU = gpu_setup() + init_opencl_context() + r2 = @time imfilter_opencl(img, krn) @info "check = ", r2[X,X,X] @info "THIRD RUN - CPU - ImageFiltering" - r3 = @btime imfilter($img, centered($krn), Inner(), Algorithm.FFT()) + r3 = @time imfilter(img, centered(krn), Inner(), Algorithm.FFT()) @info "check = ", r3[X,X,X] @info "r1 & r2", allclose(r1, r2; rtol=1e-2, atol=1e-3) @@ -43,4 +44,4 @@ function test_imfilter(N) @info "end" -end \ No newline at end of file +end diff --git a/src/utils_gpu.jl b/src/utils_gpu.jl deleted file mode 100644 index 3086feca..00000000 --- a/src/utils_gpu.jl +++ /dev/null @@ -1,93 +0,0 @@ -# ------------------------------------------------------------------ -# Licensed under the MIT License. See LICENCE in the project root. -# ------------------------------------------------------------------ - -struct GPUmeta - dev - ctx - queue - mult_kernel -end - -function gpu_setup() - @assert cl ≠ nothing "OpenCL.jl not installed, cannot use GPU" - @assert clfft ≠ nothing "CLFFT.jl not installed, cannot use GPU" - - devs = cl.devices(:gpu) - - if isempty(devs) - @warn "GPU not found, falling back to other OpenCL devices" - devs = cl.devices() - end - @assert !isempty(devs) "OpenCL device not found, make sure drivers are installed" - - dev = [] - devnames = map(d -> d[:platform][:name], devs) - for vendor in ["NVIDIA","AMD","Intel"], (idx,name) in enumerate(devnames) - if occursin(vendor, name) - dev = devs[idx] - break - end - end - - devtype = uppercase(string(dev[:device_type])) - devname = dev[:name] - - @info "using $devtype $devname" - - ctx = cl.Context(dev) - queue = cl.CmdQueue(ctx) - mult_kernel = basic_kernels(ctx) - - GPUmeta(dev, ctx, queue, mult_kernel) -end - -function basic_kernels(ctx) - mult_kernel = " - __kernel void mult(__global const double2 *a, - __global const double2 *b, - __global double2 *c) - { - int gid = get_global_id(0); - c[gid].x = a[gid].x*b[gid].x - a[gid].y*b[gid].y; - c[gid].y = a[gid].x*b[gid].y + a[gid].y*b[gid].x; - } - " - - # build OpenCL program - prog = cl.Program(ctx, source=mult_kernel) |> cl.build! - - cl.Kernel(prog, "mult") -end - - - -function clfftpad(A::AbstractArray) - # clFFT releases support powers of 2, 3, 5, ... - radices = [2,3,5] - v = clfft.version() - v ≥ v"2.8.0" && push!(radices, 7) - v ≥ v"2.12.0" && push!(radices, 11, 13) - - result = Int[] - for s in size(A) - fs = keys(factor(s)) - if fs ⊆ radices - push!(result, 0) - else - # Try a closer number that has prime factors of 2 and 3. - # Use the next power of 2 (say N) to get multiple new - # candidates. - N = nextpow(2,s) - - # fractions of N: 100%, 93%, 84%, 75%, 56% - candidates = [N, 15(N÷16), 27(N÷32), 3(N÷4), 9(N÷16)] - candidates = candidates[candidates .> s] - n = minimum(candidates) - - push!(result, n - s) - end - end - - result -end