Skip to content

Improved site and region assessment processes #72

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion Manifest-v1.11.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

julia_version = "1.11.5"
manifest_format = "2.0"
project_hash = "1b41462503d8f69b259542ee8e43dbf1a256a059"
project_hash = "6fb79bd8da16ed2240c62044e56e825aaaef4a46"

[[deps.ADTypes]]
git-tree-sha1 = "e2478490447631aedba0823d4d7a80b2cc8cdb32"
Expand Down Expand Up @@ -695,6 +695,26 @@ git-tree-sha1 = "27415f162e6028e81c72b82ef756bf321213b6ec"
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
version = "0.1.10"

[[deps.ExtendableSparse]]
deps = ["DocStringExtensions", "ILUZero", "LinearAlgebra", "Printf", "SparseArrays", "Sparspak", "StaticArrays", "SuiteSparse", "Test"]
git-tree-sha1 = "c04d2c808f0b595dc3be5fa98d107213f81e77fb"
uuid = "95c220a8-a1cf-11e9-0c77-dbfce5f500b3"
version = "1.7.1"

[deps.ExtendableSparse.extensions]
ExtendableSparseAMGCLWrapExt = "AMGCLWrap"
ExtendableSparseAlgebraicMultigridExt = "AlgebraicMultigrid"
ExtendableSparseIncompleteLUExt = "IncompleteLU"
ExtendableSparseLinearSolveExt = "LinearSolve"
ExtendableSparsePardisoExt = "Pardiso"

[deps.ExtendableSparse.weakdeps]
AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288"
AlgebraicMultigrid = "2169fc97-5a83-5252-b627-83903c6c433c"
IncompleteLU = "40713840-3770-5561-ab4c-a76e7d0d7895"
LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2"

[[deps.Extents]]
git-tree-sha1 = "063512a13dbe9c40d999c439268539aa552d1ae6"
uuid = "411431e0-e8b7-467b-b5e0-f676ba4f2910"
Expand Down Expand Up @@ -1013,6 +1033,12 @@ git-tree-sha1 = "b3d8be712fbf9237935bde0ce9b5a736ae38fc34"
uuid = "a51ab1cf-af8e-5615-a023-bc2c838bba6b"
version = "76.2.0+0"

[[deps.ILUZero]]
deps = ["LinearAlgebra", "SparseArrays"]
git-tree-sha1 = "b007cfc7f9bee9a958992d2301e9c5b63f332a90"
uuid = "88f59080-6952-5380-9ea5-54057fb9a43f"
version = "0.2.0"

[[deps.IfElse]]
git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1"
uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
Expand Down Expand Up @@ -2200,6 +2226,12 @@ deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
version = "1.11.0"

[[deps.Sparspak]]
deps = ["Libdl", "LinearAlgebra", "Logging", "OffsetArrays", "Printf", "SparseArrays", "Test"]
git-tree-sha1 = "8de8288e33a4b5b37b197cefda28974659988d11"
uuid = "e56a9233-b9d6-4f03-8d0f-1825330902ac"
version = "0.3.11"

[[deps.SpecialFunctions]]
deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"]
git-tree-sha1 = "41852b8679f78c8d8961eeadc8f62cef861a52e3"
Expand Down
4 changes: 4 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
ExtendableSparse = "95c220a8-a1cf-11e9-0c77-dbfce5f500b3"
FLoops = "cc61a311-1640-44b5-9fba-1b764f453329"
GeoDataFrames = "62cb38b5-d8d2-4862-a48e-6a340996859f"
GeoFormatTypes = "68eda718-8dee-11e9-39e7-89f7f65f511f"
Expand Down Expand Up @@ -44,6 +45,7 @@ ProtoBuf = "3349acd9-ac6a-5e09-bcdb-63829b23a429"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689"
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
SortTileRecursiveTree = "746ee33f-1797-42c2-866d-db2fce69d14d"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Expand All @@ -57,10 +59,12 @@ YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c"
AWS = "1.92.0"
AWSS3 = "0.11.2"
DiskArrays = "0.4.9"
ExtendableSparse = "1.7.1"
JSON3 = "1.14.2"
Logging = "1.11.0"
MKL_jll = "2024.2.0"
NearestNeighbors = "0.4.20"
Oxygen = "1.7.1"
Random = "1.11.0"
SortTileRecursiveTree = "0.1.4"
YAXArrays = "0.6"
20 changes: 13 additions & 7 deletions src/assessment_methods/apply_criteria.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,21 +64,27 @@ function filter_raster_by_criteria(
end

"""
Filters the slope table (which contains raster param values too) by building a
Filters a lookup table (which contains raster param values too) by building a
bit mask AND'd for all thresholds

# Arguments
- `lookup` : A lookup table to filter
- `ruleset` : Vector of `CriteriaBounds` to filter with

# Returns
BitVector, of pixels that meet criteria
"""
function filter_lookup_table_by_criteria(
slope_table::DataFrame,
lookup::DataFrame,
ruleset::Vector{CriteriaBounds}
)::DataFrame
slope_table.all_crit .= 1
)::BitVector
matches::BitVector = fill(true, nrow(lookup))

for threshold in ruleset
slope_table.all_crit =
slope_table.all_crit .& threshold.rule(slope_table[!, threshold.name])
matches = matches .& threshold.rule(lookup[!, threshold.name])
end

return slope_table[BitVector(slope_table.all_crit), :]
return matches
end

"""
Expand Down
2 changes: 2 additions & 0 deletions src/assessment_methods/assessment_methods.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import GeoInterface as GI
import GeoInterface.Wrappers as GIWrap
import GeometryOps as GO
import GeoDataFrames as GDF
import SortTileRecursiveTree as STRT
using ExtendableSparse

include("apply_criteria.jl")
include("best_fit_polygons.jl")
Expand Down
96 changes: 42 additions & 54 deletions src/assessment_methods/best_fit_polygons.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Geometry-based assessment methods."""


# Tabular data assessment methods

"""
Expand Down Expand Up @@ -353,13 +352,11 @@ end

"""
find_optimal_site_alignment(
env_lookup::DataFrame,
search_pixels::DataFrame,
lookup_tbl::DataFrame,
res::Float64,
x_dist::Union{Int64,Float64},
y_dist::Union{Int64,Float64},
target_crs::GeoFormatTypes.EPSG;
degree_step::Float64=20.0,
y_dist::Union{Int64,Float64};
degree_step::Float64=10.0,
suit_threshold::Float64=0.7
)::DataFrame

Expand All @@ -370,25 +367,22 @@ is identified and used for searching with custom rotation parameters.
Method is currently opperating for CRS in degrees units.

# Arguments
- `env_lookup` : DataFrame containing environmental variables for assessment.
- `search_pixels` : DataFrame containing lon and lat columns for each pixel that is intended for analysis.
- `lookup_tbl` : DataFrame containing environmental variables for assessment.
- `res` : Resolution of the original raster pixels. Can by found via `abs(step(dims(raster, X)))`.
- `x_dist` : Length of horizontal side of search box (in meters).
- `y_dist` : Length of vertical side of search box (in meters).
- `target_crs` : CRS of the input Rasters. Using GeoFormatTypes.EPSG().
- `degree_step` : Degree to perform rotations around identified edge angle.
- `suit_threshold` : Theshold used to skip searching where the proportion of suitable pixels is too low.

# Returns
DataFrame containing highest score, rotation and polygon for each assessment at pixels in indices.
"""
function find_optimal_site_alignment(
env_lookup::DataFrame,
search_pixels::DataFrame,
lookup_tbl::DataFrame,
res::Float64,
x_dist::Union{Int64,Float64},
y_dist::Union{Int64,Float64},
target_crs::GeoFormatTypes.EPSG;
y_dist::Union{Int64,Float64};
target_crs=EPSG(4326),
degree_step::Float64=10.0,
suit_threshold::Float64=0.7
)::DataFrame
Expand All @@ -412,24 +406,31 @@ function find_optimal_site_alignment(

# Create KD-tree to identify pixels for assessment that are close to each other.
# We can then apply a heuristic to avoid near-identical assessments.
sp_inds = Tuple.(search_pixels.indices)
inds = Matrix{Float32}([first.(sp_inds) last.(sp_inds)]')
@debug "$(now()) : Applying KD-tree filtering on $(nrow(lookup_tbl)) locations"
inds = Matrix{Float32}([lookup_tbl.lons lookup_tbl.lats]')
kdtree = KDTree(inds; leafsize=25)
ignore_idx = Int64[]
for (i, coords) in enumerate(eachcol(inds))
t_ignore_idx = Dict(x => Int64[] for x in Threads.threadpooltids(:default))
Threads.@threads for i in 1:size(inds, 2)
ignore_idx = t_ignore_idx[Threads.threadid()]
if i in ignore_idx
continue
end

coords = inds[:, i]

# If there are a group of pixels close to each other, only assess the one closest to
# the center of the group.
# Select pixels within ~22 meters (1 pixel = 10m² ∴ 3.3 ≈ 33m horizontal distance)
idx, dists = knn(kdtree, coords, 30) # retrieve 30 closest locations
sel = (dists == 0) .| (dists .< 3.3) # select current pixel and those ~33m away
xs = mean(inds[1, idx[sel]])
ys = mean(inds[2, idx[sel]])
idx, dists = knn(kdtree, coords, 60) # retrieve 60 closest locations

# Select current pixel and those ~50% of max shape length away
x_d = meters_to_degrees(x_dist, coords[2])
y_d = meters_to_degrees(y_dist, coords[2])
max_l = max(x_d, y_d) * 0.5
sel = (dists .<= max_l)

# Find index of pixel closest to the center of the group
xs = mean(inds[1, idx[sel]])
ys = mean(inds[2, idx[sel]])
closest_idx = argmin(
map(p2 -> sum((p2 .- (xs, ys)) .^ 2), eachcol(inds[:, idx[sel]]))
)
Expand All @@ -440,48 +441,35 @@ function find_optimal_site_alignment(
append!(ignore_idx, to_ignore)
end

# Create search tree
tree = STRT.STRtree(lookup_tbl.geometry)

# Search each location to assess
ignore_idx = unique(ignore_idx)
assessment_locs = search_pixels[Not(ignore_idx), :]
ignore_locs = unique(vcat(values(t_ignore_idx)...))
assessment_locs = lookup_tbl[Not(ignore_locs), :]
n_pixels = nrow(assessment_locs)
@debug "$(now()) : KD-tree filtering - removed $(length(ignore_idx)) near-identical locations, now assessing $(n_pixels) locations"
@debug "$(now()) : KD-tree filtering - removed $(length(ignore_locs)) near-identical locations, now assessing $(n_pixels) locations"

best_score = zeros(n_pixels)
best_poly = Vector(undef, n_pixels)
best_poly = Vector{GI.Wrappers.Polygon}(undef, n_pixels)
best_rotation = zeros(Int64, n_pixels)
quality_flag = zeros(Int64, n_pixels)
max_x_y_dist = maximum([x_dist, y_dist])

FLoops.assistant(false)
@floop for (i, pix) in enumerate(eachrow(assessment_locs))
lon = pix.lons
lat = pix.lats

rotated_copy::Vector{GI.Wrappers.Polygon} = move_geom.(
rotated_geoms,
Ref((lon, lat))
)
rotated_copy::Vector{GI.Wrappers.Polygon} =
move_geom.(
rotated_geoms,
Ref((lon, lat))
)

max_offset = (
abs(meters_to_degrees(max_x_y_dist * 0.5, lat)) +
(2.0 * res)
)

bounds = Float64[
lon - max_offset,
lon + max_offset,
lat - max_offset,
lat + max_offset
]

# Identify relevant pixels to assess
loc_constraint = (
(env_lookup.lons .>= bounds[1]) .& # left
(env_lookup.lons .<= bounds[2]) .& # right
(env_lookup.lats .>= bounds[3]) .& # bottom
(env_lookup.lats .<= bounds[4]) # top
)
rel_pix = env_lookup[loc_constraint, :]
n_matches = nrow(rel_pix)
# Find pixels within each rotated search area
in_pixels = unique(reduce(vcat, STRT.query.(Ref(tree), rotated_copy)))
relevant_pixels = lookup_tbl[in_pixels, :]
n_matches = nrow(relevant_pixels)

# Skip if no relevant pixels or if the number of suitable pixels are below required
# threshold
Expand All @@ -494,9 +482,9 @@ function find_optimal_site_alignment(
end

best_score[i], best_rotation[i], best_poly[i], quality_flag[i] = assess_reef_site(
rel_pix,
relevant_pixels,
rotated_copy,
10.0
suit_threshold
)
end

Expand Down
38 changes: 17 additions & 21 deletions src/assessment_methods/common_functions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -209,53 +209,50 @@ Where site polygons are overlapping, keep only the highest scoring site polygon.

# Arguments
- `res_df` : Results DataFrame containing potential site polygons
(output from `identify_potential_sites()` or `identify_edge_aligned_sites()`).
(output from `find_optimal_site_alignment()`).

# Returns
DataFrame containing only the highest scoring sites where site polygons intersect, and
containing only sites with scores greater than the `surr_threshold` specified in
`identify_edge_aligned_sites()` (default=0.6).
containing only sites with scores greater than a suitability threshold.
Sites with values below the suitability threshold are marked with QC flag = 1.
"""
function filter_sites(res_df::DataFrame)::DataFrame
if nrow(res_df) == 0
return res_df
end

res_df.row_ID = 1:size(res_df, 1)
ignore_list = Int64[]

for row in eachrow(res_df)
# Remove entries that are marked to be below the threshold score
poly_set = res_df[res_df.qc_flag .!= 1, :]

# Set unique IDs to ease search
poly_set.row_ID = 1:nrow(poly_set)

# Create search tree
tree = STRT.STRtree(poly_set.geometry)

for row in eachrow(poly_set)
if row.row_ID ∈ ignore_list
continue
end

not_ignored = res_df.row_ID .∉ Ref(ignore_list)
poly = row.geometry
poly_interx = GO.intersects.(Ref(poly), res_df[not_ignored, :geometry])
poly_interx = STRT.query.(Ref(tree), Ref(poly))

if count(poly_interx) > 1
intersecting_polys = res_df[not_ignored, :][poly_interx, :]
if length(poly_interx) > 1
intersecting_polys = poly_set[poly_interx, :]

# Find the ID of the polygon with the best score.
# Add polygon IDs with non-maximal score to the ignore list
best_poly_idx = argmax(intersecting_polys.score)
best_score = intersecting_polys[best_poly_idx, :score]
if best_score < 0.7
# Ignore everything as they are below useful threshold
append!(ignore_list, intersecting_polys[:, :row_ID])
continue
end

best_ID = intersecting_polys[best_poly_idx, :row_ID]
not_best_ID = intersecting_polys.row_ID .!= best_ID
append!(ignore_list, intersecting_polys[not_best_ID, :row_ID])
elseif count(poly_interx) == 1 && (row.score < 0.7)
# Remove current poly if below useful threshold
append!(ignore_list, Ref(row.row_ID))
end
end

return res_df[res_df.row_ID .∉ [unique(ignore_list)], :]
return poly_set[poly_set.row_ID .∉ [unique(ignore_list)], :]
end

"""
Expand All @@ -281,7 +278,6 @@ function output_geojson(
return nothing
end


"""
buffer_simplify(
gdf::DataFrame;
Expand Down
6 changes: 3 additions & 3 deletions src/assessment_methods/geom_ops.jl
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,9 @@ Vector containing tuples of coordinates for a horizontal line found within geom.
function find_horizontal(geom::GIWrap.Polygon)::Vector{Tuple{Float64,Float64}}
coords = collect(GI.coordinates(geom)...)
first_coord = first(coords)
second_coord = coords[
(getindex.(coords, 2) .∈ first_coord[2]) .&& (getindex.(coords, 1) .∉ first_coord[1])
]
lat_in_first = (getindex.(coords, 2) .∈ first_coord[2])
lon_not_in_first = (getindex.(coords, 1) .∉ first_coord[1])
second_coord = coords[lat_in_first .&& lon_not_in_first]

return [tuple(first_coord...), tuple(first(second_coord)...)]
end
Loading