|
28 | 28 | # possible to get a value of 312, then we will have 2 separate shmem blocks,
|
29 | 29 | # one from 1->256, and another from 256->512
|
30 | 30 | @uniform max_element = 1
|
| 31 | + # @print("tid=$tid, lid=$lid, gs=$gs, N=$N, max_element=$max_element\n") |
| 32 | + |
31 | 33 | for min_element in 1:gs:N
|
32 | 34 |
|
33 | 35 | # Setting shared_histogram to 0
|
|
41 | 43 |
|
42 | 44 | # Defining bin on shared memory and writing to it if possible
|
43 | 45 | bin = input[tid]
|
| 46 | + win = bin |
| 47 | + # if lid == 5 |
| 48 | + # end |
44 | 49 | if bin >= min_element && bin < max_element
|
45 | 50 | bin -= min_element - 1
|
| 51 | + @print("tid=$tid, lid=$lid, bin=$win, gs=$gs, N=$N, max_element=$max_element, min_element=$min_element, bin=$(bin)\n") |
46 | 52 | @atomic shared_histogram[bin] += 1
|
47 | 53 | end
|
48 | 54 |
|
|
53 | 59 | end
|
54 | 60 |
|
55 | 61 | end
|
| 62 | + # @print("tid=$tid, lid=$lid, gs=$gs, N=$N, max_element=$max_element\n") |
56 | 63 |
|
57 | 64 | end
|
58 | 65 |
|
59 |
| -function histogram!(histogram_output, input) |
| 66 | +function histogram!(histogram_output, input, groupsize=256) |
60 | 67 | backend = get_backend(histogram_output)
|
61 | 68 | # Need static block size
|
62 |
| - kernel! = histogram_kernel!(backend, (256,)) |
| 69 | + kernel! = histogram_kernel!(backend, (groupsize,)) |
| 70 | + @show kernel! |
| 71 | + @show size(input) |
63 | 72 | kernel!(histogram_output, input, ndrange = size(input))
|
64 | 73 | return
|
65 | 74 | end
|
|
73 | 82 |
|
74 | 83 | @testset "histogram tests" begin
|
75 | 84 | # Use Int32 as some backends don't support 64-bit atomics
|
76 |
| - rand_input = Int32.(rand(1:128, 1000)) |
77 |
| - linear_input = Int32.(rand(1:128, 1024)) |
78 |
| - all_two = fill(Int32(2), 512) |
| 85 | + # rand_input = Int32.(rand(1:128, 1000)) |
| 86 | + rand_input = Int32.(rand(1:20, 20)) |
| 87 | + # linear_input = Int32.(rand(1:128, 1024)) |
| 88 | + # all_two = fill(Int32(2), 512) |
79 | 89 |
|
80 | 90 | histogram_rand_baseline = create_histogram(rand_input)
|
81 |
| - histogram_linear_baseline = create_histogram(linear_input) |
82 |
| - histogram_two_baseline = create_histogram(all_two) |
| 91 | + # histogram_linear_baseline = create_histogram(linear_input) |
| 92 | + # histogram_two_baseline = create_histogram(all_two) |
83 | 93 |
|
84 | 94 | rand_input = move(backend, rand_input)
|
85 |
| - linear_input = move(backend, linear_input) |
86 |
| - all_two = move(backend, all_two) |
| 95 | + # linear_input = move(backend, linear_input) |
| 96 | + # all_two = move(backend, all_two) |
87 | 97 |
|
88 | 98 | rand_histogram = KernelAbstractions.zeros(backend, eltype(rand_input), maximum(rand_input))
|
89 |
| - linear_histogram = KernelAbstractions.zeros(backend, eltype(linear_input), maximum(linear_input)) |
90 |
| - two_histogram = KernelAbstractions.zeros(backend, eltype(all_two), maximum(all_two)) |
| 99 | + # linear_histogram = KernelAbstractions.zeros(backend, eltype(linear_input), maximum(linear_input)) |
| 100 | + # two_histogram = KernelAbstractions.zeros(backend, eltype(all_two), maximum(all_two)) |
91 | 101 |
|
92 |
| - histogram!(rand_histogram, rand_input) |
93 |
| - histogram!(linear_histogram, linear_input) |
94 |
| - histogram!(two_histogram, all_two) |
| 102 | + histogram!(rand_histogram, rand_input, 9) |
| 103 | + # histogram!(linear_histogram, linear_input) |
| 104 | + # histogram!(two_histogram, all_two) |
95 | 105 | KernelAbstractions.synchronize(backend)
|
96 | 106 |
|
| 107 | + @show sum(Array(rand_histogram)) |
| 108 | + @show sum(histogram_rand_baseline) |
| 109 | + @show findall(Array(rand_histogram) .!= histogram_rand_baseline) |
| 110 | + |
97 | 111 | @test isapprox(Array(rand_histogram), histogram_rand_baseline)
|
98 |
| - @test isapprox(Array(linear_histogram), histogram_linear_baseline) |
99 |
| - @test isapprox(Array(two_histogram), histogram_two_baseline) |
| 112 | + # @test isapprox(Array(linear_histogram), histogram_linear_baseline) |
| 113 | + # @test isapprox(Array(two_histogram), histogram_two_baseline) |
100 | 114 | end
|
0 commit comments