Skip to content

Commit e0614a4

Browse files
committed
vastly improved voronoi 3D performance (7x faster) by unrolling part of the loop
1 parent 57436df commit e0614a4

File tree

1 file changed

+83
-5
lines changed

1 file changed

+83
-5
lines changed

sources/noise/cell.swift

Lines changed: 83 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -253,13 +253,91 @@ struct CellNoise3D:Noise
253253

254254
var r2:Double = self.distance(from: sample, generating_point: near)
255255

256+
@inline(__always)
257+
func test(generating_point:IntV3, dx:Double = 0, dy:Double = 0, dz:Double = 0)
258+
{
259+
if dx*dx + dy*dy + dz*dz < r2
260+
{
261+
r2 = min(r2, self.distance(from: sample, generating_point: generating_point))
262+
}
263+
}
264+
265+
// (0.0 , [(-1, 0, 0), (0, -1, 0), (0, 0, -1), (0, -1, -1), (-1, 0, -1), (-1, -1, 0), (-1, -1, -1)])
266+
let far:IntV3 = (near.a - quadrant.a, near.b - quadrant.b, near.c - quadrant.c)
267+
test(generating_point: (far.a, near.b, near.c), dx: nearpoint_disp.x - 0.5)
268+
test(generating_point: (near.a, far.b, near.c), dy: nearpoint_disp.y - 0.5)
269+
test(generating_point: (near.a, near.b, far.c), dz: nearpoint_disp.z - 0.5)
270+
271+
test(generating_point: (near.a, far.b, far.c), dy: nearpoint_disp.y - 0.5, dz: nearpoint_disp.z - 0.5)
272+
test(generating_point: (far.a, near.b, far.c), dx: nearpoint_disp.x - 0.5, dz: nearpoint_disp.z - 0.5)
273+
test(generating_point: (far.a, far.b, near.c), dx: nearpoint_disp.x - 0.5, dy: nearpoint_disp.y - 0.5)
274+
275+
test(generating_point: far, dx: nearpoint_disp.x - 0.5, dy: nearpoint_disp.y - 0.5, dz: nearpoint_disp.z - 0.5)
276+
277+
// Testing shows about 47.85% of samples are eliminated by here
278+
// (0.25, [(1, 0, 0), ( 0, 1, 0), ( 0, 0, 1),
279+
// (0, -1, 1), ( 0, 1, -1), ( 1, 0, -1), (-1, 0, 1), (-1, 1, 0), (1, -1, 0),
280+
// (1, -1, -1), (-1, 1, -1), (-1, -1, 1)])
281+
guard r2 > 0.25
282+
else
283+
{
284+
return self.amplitude * r2
285+
}
286+
287+
let inner:IntV3 = (near.a + quadrant.a, near.b + quadrant.b, near.c + quadrant.c)
288+
test(generating_point: (inner.a, near.b, near.c), dx: nearpoint_disp.x + 0.5)
289+
test(generating_point: (near.a, inner.b, near.c), dy: nearpoint_disp.y + 0.5)
290+
test(generating_point: (near.a, near.b, inner.c), dz: nearpoint_disp.z + 0.5)
291+
292+
test(generating_point: (near.a, far.b, inner.c), dy: nearpoint_disp.y - 0.5, dz: nearpoint_disp.z + 0.5)
293+
test(generating_point: (near.a, inner.b, far.c), dy: nearpoint_disp.y + 0.5, dz: nearpoint_disp.z - 0.5)
294+
test(generating_point: (inner.a, near.b, far.c), dx: nearpoint_disp.x + 0.5, dz: nearpoint_disp.z - 0.5)
295+
test(generating_point: (far.a, near.b, inner.c), dx: nearpoint_disp.x - 0.5, dz: nearpoint_disp.z + 0.5)
296+
test(generating_point: (far.a, inner.b, near.c), dx: nearpoint_disp.x - 0.5, dy: nearpoint_disp.y + 0.5)
297+
test(generating_point: (inner.a, far.b, near.c), dx: nearpoint_disp.x + 0.5, dy: nearpoint_disp.y - 0.5)
298+
299+
test(generating_point: (inner.a, far.b, far.c), dx: nearpoint_disp.x + 0.5, dy: nearpoint_disp.y - 0.5, dz: nearpoint_disp.z - 0.5)
300+
test(generating_point: (far.a, inner.b, far.c), dx: nearpoint_disp.x - 0.5, dy: nearpoint_disp.y + 0.5, dz: nearpoint_disp.z - 0.5)
301+
test(generating_point: (far.a, far.b, inner.c), dx: nearpoint_disp.x - 0.5, dy: nearpoint_disp.y - 0.5, dz: nearpoint_disp.z + 0.5)
302+
303+
// Testing shows about 88.60% of samples are eliminated by here
304+
// (0.5 , [(0, 1, 1), (1, 0, 1), (1, 1, 0), (-1, 1, 1), (1, -1, 1), (1, 1, -1)])
305+
guard r2 > 0.5
306+
else
307+
{
308+
return self.amplitude * r2
309+
}
310+
311+
test(generating_point: (near.a, inner.b, inner.c), dy: nearpoint_disp.y + 0.5, dz: nearpoint_disp.z + 0.5)
312+
test(generating_point: (inner.a, near.b, inner.c), dx: nearpoint_disp.x + 0.5, dz: nearpoint_disp.z + 0.5)
313+
test(generating_point: (inner.a, inner.b, near.c), dx: nearpoint_disp.x + 0.5, dy: nearpoint_disp.y + 0.5)
314+
315+
test(generating_point: (far.a, inner.b, inner.c), dx: nearpoint_disp.x - 0.5, dy: nearpoint_disp.y + 0.5, dz: nearpoint_disp.z + 0.5)
316+
test(generating_point: (inner.a, far.b, inner.c), dx: nearpoint_disp.x + 0.5, dy: nearpoint_disp.y - 0.5, dz: nearpoint_disp.z + 0.5)
317+
test(generating_point: (inner.a, inner.b, far.c), dx: nearpoint_disp.x + 0.5, dy: nearpoint_disp.y + 0.5, dz: nearpoint_disp.z - 0.5)
318+
319+
// Testing shows about 98.26% of samples are eliminated by here
320+
// (0.75, [(1, 1, 1)])
321+
guard r2 > 0.75
322+
else
323+
{
324+
return self.amplitude * r2
325+
}
326+
327+
test(generating_point: inner, dx: nearpoint_disp.x + 0.5, dy: nearpoint_disp.y + 0.5, dz: nearpoint_disp.z + 0.5)
328+
329+
// Testing shows about 99.94% of samples are eliminated by here
330+
331+
// The following loop is responsible for about 25% of the noise generator’s
332+
// runtime. While it is possible to unroll the rest of it, we run up against
333+
// diminishing returns.
256334
let kernel:[(r2:Double, cell_offsets:[(Int, Int, Int)])] =
257335
[
258-
(0.0 , [/*(0, 0, 0), */(-1, 0, 0), (0, -1, 0), (-1, -1, 0), (0, 0, -1), (-1, 0, -1), (0, -1, -1), (-1, -1, -1)]),
259-
(0.25, [(0, 0, 1), (-1, 0, 1), (0, -1, 1), (-1, -1, 1), (0, 1, 0), (-1, 1, 0), (1, 0, 0), (1, -1, 0),
260-
(0, 1, -1), (-1, 1, -1), (1, 0, -1), (1, -1, -1)]),
261-
(0.5 , [(0, 1, 1), (-1, 1, 1), (1, 0, 1), (1, -1, 1), (1, 1, 0), (1, 1, -1)]),
262-
(0.75, [(1, 1, 1)]),
336+
// (0.0 , [(-1, 0, 0), (0, -1, 0), (0, 0, -1), (-1, -1, 0), (-1, 0, -1), (0, -1, -1), (-1, -1, -1)]),
337+
// (0.25, [(1, 0, 0), (0, 1, 0), (0, 0, 1), (-1, 0, 1), (0, -1, 1), (-1, -1, 1), (-1, 1, 0), (1, -1, 0),
338+
// (0, 1, -1), (-1, 1, -1), (1, 0, -1), (1, -1, -1)]),
339+
// (0.5 , [(0, 1, 1), (1, 0, 1), (1, 1, 0), (-1, 1, 1), (1, -1, 1), (1, 1, -1)]),
340+
// (0.75, [(1, 1, 1)]),
263341
(1.0 , [(-2, 0, 0), (-2, -1, 0), (0, -2, 0), (-1, -2, 0), (-2, 0, -1), (-2, -1, -1), (0, -2, -1), (-1, -2, -1),
264342
(0, 0, -2), (-1, 0, -2), (0, -1, -2), (-1, -1, -2)]),
265343
(1.25, [(-2, 0, 1), (-2, -1, 1), (0, -2, 1), (-1, -2, 1), (-2, 1, 0), (1, -2, 0), (-2, 1, -1), (1, -2, -1),

0 commit comments

Comments
 (0)