From 31b4dd5740c6b415d39d2e8c6529f0bbb091ecaa Mon Sep 17 00:00:00 2001 From: Takuya Iwanaga Date: Thu, 5 Jun 2025 22:56:28 +1000 Subject: [PATCH 1/9] Add additional dependencies For efficient spatial querying --- Manifest-v1.11.toml | 34 +++++++++++++++++++- Project.toml | 4 +++ src/assessment_methods/assessment_methods.jl | 2 ++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/Manifest-v1.11.toml b/Manifest-v1.11.toml index e98ef5a..854639d 100644 --- a/Manifest-v1.11.toml +++ b/Manifest-v1.11.toml @@ -2,7 +2,7 @@ julia_version = "1.11.5" manifest_format = "2.0" -project_hash = "1b41462503d8f69b259542ee8e43dbf1a256a059" +project_hash = "6fb79bd8da16ed2240c62044e56e825aaaef4a46" [[deps.ADTypes]] git-tree-sha1 = "e2478490447631aedba0823d4d7a80b2cc8cdb32" @@ -695,6 +695,26 @@ git-tree-sha1 = "27415f162e6028e81c72b82ef756bf321213b6ec" uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" version = "0.1.10" +[[deps.ExtendableSparse]] +deps = ["DocStringExtensions", "ILUZero", "LinearAlgebra", "Printf", "SparseArrays", "Sparspak", "StaticArrays", "SuiteSparse", "Test"] +git-tree-sha1 = "c04d2c808f0b595dc3be5fa98d107213f81e77fb" +uuid = "95c220a8-a1cf-11e9-0c77-dbfce5f500b3" +version = "1.7.1" + + [deps.ExtendableSparse.extensions] + ExtendableSparseAMGCLWrapExt = "AMGCLWrap" + ExtendableSparseAlgebraicMultigridExt = "AlgebraicMultigrid" + ExtendableSparseIncompleteLUExt = "IncompleteLU" + ExtendableSparseLinearSolveExt = "LinearSolve" + ExtendableSparsePardisoExt = "Pardiso" + + [deps.ExtendableSparse.weakdeps] + AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" + AlgebraicMultigrid = "2169fc97-5a83-5252-b627-83903c6c433c" + IncompleteLU = "40713840-3770-5561-ab4c-a76e7d0d7895" + LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" + Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2" + [[deps.Extents]] git-tree-sha1 = "063512a13dbe9c40d999c439268539aa552d1ae6" uuid = "411431e0-e8b7-467b-b5e0-f676ba4f2910" @@ -1013,6 +1033,12 @@ git-tree-sha1 = "b3d8be712fbf9237935bde0ce9b5a736ae38fc34" uuid = "a51ab1cf-af8e-5615-a023-bc2c838bba6b" version = "76.2.0+0" +[[deps.ILUZero]] +deps = ["LinearAlgebra", "SparseArrays"] +git-tree-sha1 = "b007cfc7f9bee9a958992d2301e9c5b63f332a90" +uuid = "88f59080-6952-5380-9ea5-54057fb9a43f" +version = "0.2.0" + [[deps.IfElse]] git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1" uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" @@ -2200,6 +2226,12 @@ deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" version = "1.11.0" +[[deps.Sparspak]] +deps = ["Libdl", "LinearAlgebra", "Logging", "OffsetArrays", "Printf", "SparseArrays", "Test"] +git-tree-sha1 = "8de8288e33a4b5b37b197cefda28974659988d11" +uuid = "e56a9233-b9d6-4f03-8d0f-1825330902ac" +version = "0.3.11" + [[deps.SpecialFunctions]] deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"] git-tree-sha1 = "41852b8679f78c8d8961eeadc8f62cef861a52e3" diff --git a/Project.toml b/Project.toml index 4f912a8..6487f23 100644 --- a/Project.toml +++ b/Project.toml @@ -13,6 +13,7 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3" Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" +ExtendableSparse = "95c220a8-a1cf-11e9-0c77-dbfce5f500b3" FLoops = "cc61a311-1640-44b5-9fba-1b764f453329" GeoDataFrames = "62cb38b5-d8d2-4862-a48e-6a340996859f" GeoFormatTypes = "68eda718-8dee-11e9-39e7-89f7f65f511f" @@ -44,6 +45,7 @@ ProtoBuf = "3349acd9-ac6a-5e09-bcdb-63829b23a429" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +SortTileRecursiveTree = "746ee33f-1797-42c2-866d-db2fce69d14d" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" @@ -57,10 +59,12 @@ YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c" AWS = "1.92.0" AWSS3 = "0.11.2" DiskArrays = "0.4.9" +ExtendableSparse = "1.7.1" JSON3 = "1.14.2" Logging = "1.11.0" MKL_jll = "2024.2.0" NearestNeighbors = "0.4.20" Oxygen = "1.7.1" Random = "1.11.0" +SortTileRecursiveTree = "0.1.4" YAXArrays = "0.6" diff --git a/src/assessment_methods/assessment_methods.jl b/src/assessment_methods/assessment_methods.jl index 77a0307..ce58f5b 100644 --- a/src/assessment_methods/assessment_methods.jl +++ b/src/assessment_methods/assessment_methods.jl @@ -13,6 +13,8 @@ import GeoInterface as GI import GeoInterface.Wrappers as GIWrap import GeometryOps as GO import GeoDataFrames as GDF +import SortTileRecursiveTree as STRT +using ExtendableSparse include("apply_criteria.jl") include("best_fit_polygons.jl") From 05682671a5aeb05d2ba8bdda1ea261b97f6ea154 Mon Sep 17 00:00:00 2001 From: Takuya Iwanaga Date: Thu, 5 Jun 2025 23:01:45 +1000 Subject: [PATCH 2/9] Specifically load up valid extent for reef slopes File is needed to use as a template when writing out COGs. TODO: Fix up hard coded filename suffix. --- src/utility/regions_criteria_setup.jl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/utility/regions_criteria_setup.jl b/src/utility/regions_criteria_setup.jl index b57eef1..4ad76c2 100644 --- a/src/utility/regions_criteria_setup.jl +++ b/src/utility/regions_criteria_setup.jl @@ -215,6 +215,7 @@ corresponding layer in the RasterStack struct RegionalDataEntry region_id::String region_metadata::RegionMetadata + valid_extent::Rasters.Raster raster_stack::Rasters.RasterStack slope_table::DataFrame criteria::BoundedCriteriaDict @@ -222,6 +223,7 @@ struct RegionalDataEntry function RegionalDataEntry(; region_id::String, region_metadata::RegionMetadata, + valid_extent::Rasters.Raster, raster_stack::Rasters.RasterStack, slope_table::DataFrame, criteria::BoundedCriteriaDict @@ -303,7 +305,9 @@ struct RegionalDataEntry instantiated_criteria ) expected_criteria = length(expected_criteria_set) - return new(region_id, region_metadata, raster_stack, slope_table, criteria) + return new( + region_id, region_metadata, valid_extent, raster_stack, slope_table, criteria + ) end end @@ -606,7 +610,9 @@ function initialise_data(config::Dict)::RegionalData ) # Add coordinate columns for spatial referencing - add_lat_long_columns_to_dataframe(slope_table) + if "lons" ∉ names(slope_table) + add_lat_long_columns_to_dataframe(slope_table) + end # Filter criteria list to only those available for this region available_criteria::Vector{String} = region_metadata.available_criteria @@ -652,10 +658,16 @@ function initialise_data(config::Dict)::RegionalData ) raster_stack = RasterStack(data_paths; name=data_names, lazy=true) + extent_path = joinpath( + data_source_directory, "$(region_metadata.id)_valid_slopes.tif" + ) + valid_extent = Raster(extent_path; lazy=true) + # Store complete regional data entry regional_data[region_metadata.id] = RegionalDataEntry(; region_id=region_metadata.id, region_metadata, + valid_extent, raster_stack, slope_table, criteria=bounds From 5190d220e186ec76ca24b8392d787283350e12bf Mon Sep 17 00:00:00 2001 From: Takuya Iwanaga Date: Thu, 5 Jun 2025 23:09:35 +1000 Subject: [PATCH 3/9] Update regional assessment to new improved approach Based on an arbitrary threshold of estimated raster size, use a regular in-memory matrix (< 700MB) or a Extendable Sparse Matrix (to reduce memory use, at the cost of higher COG write times). --- src/assessment_methods/apply_criteria.jl | 20 +++-- src/assessment_methods/site_identification.jl | 84 +++++++++---------- 2 files changed, 53 insertions(+), 51 deletions(-) diff --git a/src/assessment_methods/apply_criteria.jl b/src/assessment_methods/apply_criteria.jl index 7ceb142..f7948d1 100644 --- a/src/assessment_methods/apply_criteria.jl +++ b/src/assessment_methods/apply_criteria.jl @@ -64,21 +64,27 @@ function filter_raster_by_criteria( end """ -Filters the slope table (which contains raster param values too) by building a +Filters a lookup table (which contains raster param values too) by building a bit mask AND'd for all thresholds + +# Arguments +- `lookup` : A lookup table to filter +- `ruleset` : Vector of `CriteriaBounds` to filter with + +# Returns +BitVector, of pixels that meet criteria """ function filter_lookup_table_by_criteria( - slope_table::DataFrame, + lookup::DataFrame, ruleset::Vector{CriteriaBounds} -)::DataFrame - slope_table.all_crit .= 1 +)::BitVector + matches::BitVector = fill(true, nrow(lookup)) for threshold in ruleset - slope_table.all_crit = - slope_table.all_crit .& threshold.rule(slope_table[!, threshold.name]) + matches = matches .& threshold.rule(lookup[!, threshold.name]) end - return slope_table[BitVector(slope_table.all_crit), :] + return matches end """ diff --git a/src/assessment_methods/site_identification.jl b/src/assessment_methods/site_identification.jl index f63319e..659361e 100644 --- a/src/assessment_methods/site_identification.jl +++ b/src/assessment_methods/site_identification.jl @@ -45,7 +45,9 @@ function proportion_suitable( end """ - assess_region(reg_assess_data, reg, qp, rtype) + assess_region( + params::RegionalAssessmentParameters + )::Raster Perform raster suitability assessment based on user-defined criteria. @@ -56,75 +58,69 @@ Perform raster suitability assessment based on user-defined criteria. GeoTiff file of surrounding hectare suitability (1-100%) based on the criteria bounds input by a user. """ -function assess_region(params::RegionalAssessmentParameters)::Raster +function assess_region( + params::RegionalAssessmentParameters +)::Raster # Make mask of suitable locations @debug "$(now()) : Creating mask for region" + # Estimate size of mask (in MBs) + lookup_tbl = params.region_data.slope_table + region_dims = maximum(lookup_tbl.lon_idx), maximum(lookup_tbl.lat_idx) + mask_size_MB = prod(region_dims) / 1024^2 + + # Arbitrary threshold - use sparse matrices if likely to exceed memory + if mask_size_MB < 700 + @debug "$(now()) : Creating mask as a regular matrix (est. size: $(mask_size_MB))" + indicator = zeros( + Int8, maximum(lookup_tbl.lon_idx), maximum(lookup_tbl.lat_idx) + ) + else + @debug "$(now()) : Creating mask as a sparse matrix (est. size: $(mask_size_MB))" + indicator = ExtendableSparseMatrix{Int8,Int64}( + maximum(lookup_tbl.lon_idx), maximum(lookup_tbl.lat_idx) + ) + end + # Builds out a set of criteria filters using the regional criteria. # NOTE this will only filter over available criteria filters = build_criteria_bounds_from_regional_criteria(params.regional_criteria) - # map our regional criteria - @debug "Applying criteria thresholds to generate mask layer" - mask_data = filter_raster_by_criteria( - # This is the raster stack - params.region_data.raster_stack, - # The slope table dataframe - params.region_data.slope_table, - # The list of criteria bounds - filters - ) - - # Assess remaining pixels for their suitability - @debug "$(now()) : Calculating proportional suitability score" - suitability_scores = proportion_suitable(mask_data.data) + @debug "$(now()) : Applying criteria thresholds to generate mask layer" + matching_idx = filter_lookup_table_by_criteria(lookup_tbl, filters) + for r in eachrow(lookup_tbl[matching_idx, :]) + indicator[r.lon_idx, r.lat_idx] = true + end - @debug "$(now()) : Rebuilding raster and returning results" - return rebuild(mask_data, suitability_scores) + return Raster(params.region_data.valid_extent; data=indicator) end -function assess_sites( - params::SuitabilityAssessmentParameters, - regional_raster::Raster -) +function assess_sites(params::SuitabilityAssessmentParameters) target_crs = convert(EPSG, crs(regional_raster)) suitability_threshold = params.suitability_threshold region = params.region - @debug "$(now()) : Identifying search pixels for $(region)" - target_locs = lookup_df_from_raster(regional_raster, suitability_threshold) - - if size(target_locs, 1) == 0 - # No viable set of locations, return empty dataframe - return DataFrame(; - score=[], - orientation=[], - qc_flag=[], - geometry=[] - ) - end - - # Otherwise, create the file @debug "$(now()) : Assessing criteria table for $(region)" # Get criteria bounds list from criteria filters = build_criteria_bounds_from_regional_criteria(params.regional_criteria) - crit_pixels::DataFrame = filter_lookup_table_by_criteria( - # Slope table - params.region_data.slope_table, + slope_table = params.region_data.slope_table + matching_idx::BitVector = filter_lookup_table_by_criteria( + slope_table, filters ) res = abs(step(dims(regional_raster, X))) - @debug "$(now()) : Assessing $(size(target_locs, 1)) candidate locations in $(region)." + @debug "$(now()) : Assessing $(count(matching_idx)) candidate locations in $(region)." @debug "Finding optimal site alignment" initial_polygons = find_optimal_site_alignment( - crit_pixels, - target_locs, + slope_table[matching_idx, :], res, params.x_dist, - params.y_dist, - target_crs + params.y_dist; + target_crs=target_crs, + degree_step=20.0, + suit_threshold=suitability_threshold ) return initial_polygons From b5d95dd2f6c2eff6805bfd848fec5ec5dce3fe96 Mon Sep 17 00:00:00 2001 From: Takuya Iwanaga Date: Thu, 5 Jun 2025 23:10:10 +1000 Subject: [PATCH 4/9] Update site assessment to improved approach Rather than relying on index-based search of rasters or coordinate-based filtering, apply Sort-Tile-Recursive Trees (based on R-tree indexing) for quick and efficient filtering. --- src/assessment_methods/best_fit_polygons.jl | 96 +++++++++------------ src/assessment_methods/common_functions.jl | 38 ++++---- src/job_worker/handlers.jl | 32 +------ 3 files changed, 60 insertions(+), 106 deletions(-) diff --git a/src/assessment_methods/best_fit_polygons.jl b/src/assessment_methods/best_fit_polygons.jl index efb7f39..42510db 100644 --- a/src/assessment_methods/best_fit_polygons.jl +++ b/src/assessment_methods/best_fit_polygons.jl @@ -1,6 +1,5 @@ """Geometry-based assessment methods.""" - # Tabular data assessment methods """ @@ -353,13 +352,11 @@ end """ find_optimal_site_alignment( - env_lookup::DataFrame, - search_pixels::DataFrame, + lookup_tbl::DataFrame, res::Float64, x_dist::Union{Int64,Float64}, - y_dist::Union{Int64,Float64}, - target_crs::GeoFormatTypes.EPSG; - degree_step::Float64=20.0, + y_dist::Union{Int64,Float64}; + degree_step::Float64=10.0, suit_threshold::Float64=0.7 )::DataFrame @@ -370,12 +367,10 @@ is identified and used for searching with custom rotation parameters. Method is currently opperating for CRS in degrees units. # Arguments -- `env_lookup` : DataFrame containing environmental variables for assessment. -- `search_pixels` : DataFrame containing lon and lat columns for each pixel that is intended for analysis. +- `lookup_tbl` : DataFrame containing environmental variables for assessment. - `res` : Resolution of the original raster pixels. Can by found via `abs(step(dims(raster, X)))`. - `x_dist` : Length of horizontal side of search box (in meters). - `y_dist` : Length of vertical side of search box (in meters). -- `target_crs` : CRS of the input Rasters. Using GeoFormatTypes.EPSG(). - `degree_step` : Degree to perform rotations around identified edge angle. - `suit_threshold` : Theshold used to skip searching where the proportion of suitable pixels is too low. @@ -383,12 +378,11 @@ Method is currently opperating for CRS in degrees units. DataFrame containing highest score, rotation and polygon for each assessment at pixels in indices. """ function find_optimal_site_alignment( - env_lookup::DataFrame, - search_pixels::DataFrame, + lookup_tbl::DataFrame, res::Float64, x_dist::Union{Int64,Float64}, - y_dist::Union{Int64,Float64}, - target_crs::GeoFormatTypes.EPSG; + y_dist::Union{Int64,Float64}; + target_crs=EPSG(4326), degree_step::Float64=10.0, suit_threshold::Float64=0.7 )::DataFrame @@ -412,24 +406,31 @@ function find_optimal_site_alignment( # Create KD-tree to identify pixels for assessment that are close to each other. # We can then apply a heuristic to avoid near-identical assessments. - sp_inds = Tuple.(search_pixels.indices) - inds = Matrix{Float32}([first.(sp_inds) last.(sp_inds)]') + @debug "$(now()) : Applying KD-tree filtering on $(nrow(lookup_tbl)) locations" + inds = Matrix{Float32}([lookup_tbl.lons lookup_tbl.lats]') kdtree = KDTree(inds; leafsize=25) - ignore_idx = Int64[] - for (i, coords) in enumerate(eachcol(inds)) + t_ignore_idx = Dict(x => Int64[] for x in Threads.threadpooltids(:default)) + Threads.@threads for i in 1:size(inds, 2) + ignore_idx = t_ignore_idx[Threads.threadid()] if i in ignore_idx continue end + coords = inds[:, i] + # If there are a group of pixels close to each other, only assess the one closest to # the center of the group. - # Select pixels within ~22 meters (1 pixel = 10m² ∴ 3.3 ≈ 33m horizontal distance) - idx, dists = knn(kdtree, coords, 30) # retrieve 30 closest locations - sel = (dists == 0) .| (dists .< 3.3) # select current pixel and those ~33m away - xs = mean(inds[1, idx[sel]]) - ys = mean(inds[2, idx[sel]]) + idx, dists = knn(kdtree, coords, 60) # retrieve 60 closest locations + + # Select current pixel and those ~50% of max shape length away + x_d = meters_to_degrees(x_dist, coords[2]) + y_d = meters_to_degrees(y_dist, coords[2]) + max_l = max(x_d, y_d) * 0.5 + sel = (dists .<= max_l) # Find index of pixel closest to the center of the group + xs = mean(inds[1, idx[sel]]) + ys = mean(inds[2, idx[sel]]) closest_idx = argmin( map(p2 -> sum((p2 .- (xs, ys)) .^ 2), eachcol(inds[:, idx[sel]])) ) @@ -440,48 +441,35 @@ function find_optimal_site_alignment( append!(ignore_idx, to_ignore) end + # Create search tree + tree = STRT.STRtree(lookup_tbl.geometry) + # Search each location to assess - ignore_idx = unique(ignore_idx) - assessment_locs = search_pixels[Not(ignore_idx), :] + ignore_locs = unique(vcat(values(t_ignore_idx)...)) + assessment_locs = lookup_tbl[Not(ignore_locs), :] n_pixels = nrow(assessment_locs) - @debug "$(now()) : KD-tree filtering - removed $(length(ignore_idx)) near-identical locations, now assessing $(n_pixels) locations" + @debug "$(now()) : KD-tree filtering - removed $(length(ignore_locs)) near-identical locations, now assessing $(n_pixels) locations" best_score = zeros(n_pixels) - best_poly = Vector(undef, n_pixels) + best_poly = Vector{GI.Wrappers.Polygon}(undef, n_pixels) best_rotation = zeros(Int64, n_pixels) quality_flag = zeros(Int64, n_pixels) - max_x_y_dist = maximum([x_dist, y_dist]) + FLoops.assistant(false) @floop for (i, pix) in enumerate(eachrow(assessment_locs)) lon = pix.lons lat = pix.lats - rotated_copy::Vector{GI.Wrappers.Polygon} = move_geom.( - rotated_geoms, - Ref((lon, lat)) - ) + rotated_copy::Vector{GI.Wrappers.Polygon} = + move_geom.( + rotated_geoms, + Ref((lon, lat)) + ) - max_offset = ( - abs(meters_to_degrees(max_x_y_dist * 0.5, lat)) + - (2.0 * res) - ) - - bounds = Float64[ - lon - max_offset, - lon + max_offset, - lat - max_offset, - lat + max_offset - ] - - # Identify relevant pixels to assess - loc_constraint = ( - (env_lookup.lons .>= bounds[1]) .& # left - (env_lookup.lons .<= bounds[2]) .& # right - (env_lookup.lats .>= bounds[3]) .& # bottom - (env_lookup.lats .<= bounds[4]) # top - ) - rel_pix = env_lookup[loc_constraint, :] - n_matches = nrow(rel_pix) + # Find pixels within each rotated search area + in_pixels = unique(reduce(vcat, STRT.query.(Ref(tree), rotated_copy))) + relevant_pixels = lookup_tbl[in_pixels, :] + n_matches = nrow(relevant_pixels) # Skip if no relevant pixels or if the number of suitable pixels are below required # threshold @@ -494,9 +482,9 @@ function find_optimal_site_alignment( end best_score[i], best_rotation[i], best_poly[i], quality_flag[i] = assess_reef_site( - rel_pix, + relevant_pixels, rotated_copy, - 10.0 + suit_threshold ) end diff --git a/src/assessment_methods/common_functions.jl b/src/assessment_methods/common_functions.jl index 71fc4d3..a46ce0a 100644 --- a/src/assessment_methods/common_functions.jl +++ b/src/assessment_methods/common_functions.jl @@ -209,53 +209,50 @@ Where site polygons are overlapping, keep only the highest scoring site polygon. # Arguments - `res_df` : Results DataFrame containing potential site polygons - (output from `identify_potential_sites()` or `identify_edge_aligned_sites()`). + (output from `find_optimal_site_alignment()`). # Returns DataFrame containing only the highest scoring sites where site polygons intersect, and -containing only sites with scores greater than the `surr_threshold` specified in -`identify_edge_aligned_sites()` (default=0.6). +containing only sites with scores greater than a suitability threshold. +Sites with values below the suitability threshold are marked with QC flag = 1. """ function filter_sites(res_df::DataFrame)::DataFrame if nrow(res_df) == 0 return res_df end - res_df.row_ID = 1:size(res_df, 1) ignore_list = Int64[] - for row in eachrow(res_df) + # Remove entries that are marked to be below the threshold score + poly_set = res_df[res_df.qc_flag .!= 1, :] + + # Set unique IDs to ease search + poly_set.row_ID = 1:nrow(poly_set) + + # Create search tree + tree = STRT.STRtree(poly_set.geometry) + + for row in eachrow(poly_set) if row.row_ID ∈ ignore_list continue end - not_ignored = res_df.row_ID .∉ Ref(ignore_list) poly = row.geometry - poly_interx = GO.intersects.(Ref(poly), res_df[not_ignored, :geometry]) + poly_interx = STRT.query.(Ref(tree), Ref(poly)) - if count(poly_interx) > 1 - intersecting_polys = res_df[not_ignored, :][poly_interx, :] + if length(poly_interx) > 1 + intersecting_polys = poly_set[poly_interx, :] # Find the ID of the polygon with the best score. # Add polygon IDs with non-maximal score to the ignore list best_poly_idx = argmax(intersecting_polys.score) - best_score = intersecting_polys[best_poly_idx, :score] - if best_score < 0.7 - # Ignore everything as they are below useful threshold - append!(ignore_list, intersecting_polys[:, :row_ID]) - continue - end - best_ID = intersecting_polys[best_poly_idx, :row_ID] not_best_ID = intersecting_polys.row_ID .!= best_ID append!(ignore_list, intersecting_polys[not_best_ID, :row_ID]) - elseif count(poly_interx) == 1 && (row.score < 0.7) - # Remove current poly if below useful threshold - append!(ignore_list, Ref(row.row_ID)) end end - return res_df[res_df.row_ID .∉ [unique(ignore_list)], :] + return poly_set[poly_set.row_ID .∉ [unique(ignore_list)], :] end """ @@ -281,7 +278,6 @@ function output_geojson( return nothing end - """ buffer_simplify( gdf::DataFrame; diff --git a/src/job_worker/handlers.jl b/src/job_worker/handlers.jl index deba39f..55d0ffe 100644 --- a/src/job_worker/handlers.jl +++ b/src/job_worker/handlers.jl @@ -395,41 +395,11 @@ function handle_job( ) @info "Done compiling parameters" - @debug "Converting suitability job into regional job for regional assessment" - regional_params = regional_params_from_suitability_params(params) - @debug "Conversion complete" - - @info "Performing regional assessment" - regional_assessment_fn = build_regional_assessment_file_path( - regional_params; ext="tiff", config=config - ) - @debug "COG File name: $(regional_assessment_fn)" - - if !isfile(regional_assessment_fn) - @debug "File system cache was not hit for this task" - @debug "Assessing region $(params.region)" - regional_raster = assess_region(regional_params) - - @debug now() "Writing COG of regional assessment to $(regional_assessment_fn)" - _write_cog(regional_assessment_fn, regional_raster, config) - @debug now() "Finished writing cog " - else - @info "Cache hit - skipping regional assessment process..." - @debug "Pulling out raster from cache" - regional_raster = Raster(regional_assessment_fn; missingval=0, lazy=true) - end - @debug "Performing site assessment" best_sites = filter_sites( - assess_sites( - params, - regional_raster - ) + assess_sites(params) ) - # Specifically clear from memory to invoke garbage collector - regional_raster = nothing - @debug "Writing to temporary file" geojson_name = "$(tempname()).geojson" @debug "File name $(geojson_name)" From 000c6e0e3d5d9146b56a6679c2748ea0e237c9de Mon Sep 17 00:00:00 2001 From: Takuya Iwanaga Date: Thu, 5 Jun 2025 23:15:20 +1000 Subject: [PATCH 5/9] auto-format --- src/job_worker/handlers.jl | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/job_worker/handlers.jl b/src/job_worker/handlers.jl index 55d0ffe..84a11bf 100644 --- a/src/job_worker/handlers.jl +++ b/src/job_worker/handlers.jl @@ -181,9 +181,9 @@ function process_job( end # -# ================= -# TEST -# ================= +# ================= +# TEST +# ================= # """ @@ -225,7 +225,7 @@ end # # =================== -# REGIONAL_ASSESSMENT +# REGIONAL_ASSESSMENT # =================== # @@ -266,7 +266,7 @@ Handler for REGIONAL_ASSESSMENT jobs struct RegionalAssessmentHandler <: AbstractJobHandler end """ -Handler for the regional assessment job. +Handler for the regional assessment job. """ function handle_job( ::RegionalAssessmentHandler, input::RegionalAssessmentInput, @@ -290,7 +290,9 @@ function handle_job( @info "Done compiling parameters" @info "Performing regional assessment" - regional_assessment_filename = build_regional_assessment_file_path(params; ext="tiff", config) + regional_assessment_filename = build_regional_assessment_file_path( + params; ext="tiff", config + ) @debug "COG File name: $(regional_assessment_filename)" if !isfile(regional_assessment_filename) @@ -305,7 +307,7 @@ function handle_job( @info "Cache hit - skipping regional assessment process and re-uploading to output!" end - # Now upload this to s3 + # Now upload this to s3 client = S3StorageClient(; region=context.aws_region) # Output file names @@ -325,7 +327,7 @@ end # # ====================== -# SUITABILITY_ASSESSMENT +# SUITABILITY_ASSESSMENT # ====================== # @@ -373,7 +375,7 @@ Handler for SUITABILITY_ASSESSMENT jobs struct SuitabilityAssessmentHandler <: AbstractJobHandler end """ -Handler for the suitability assessment job. +Handler for the suitability assessment job. """ function handle_job( ::SuitabilityAssessmentHandler, input::SuitabilityAssessmentInput, @@ -412,7 +414,7 @@ function handle_job( output_geojson(geojson_name, best_sites) end - # Now upload this to s3 + # Now upload this to s3 client = S3StorageClient(; region=context.aws_region) # Output file names From 7df8df6bb92015b96bea55f87184491ca186ba47 Mon Sep 17 00:00:00 2001 From: Takuya Iwanaga Date: Thu, 5 Jun 2025 23:21:59 +1000 Subject: [PATCH 6/9] Specify missing value --- src/assessment_methods/site_identification.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/assessment_methods/site_identification.jl b/src/assessment_methods/site_identification.jl index 659361e..fa4a739 100644 --- a/src/assessment_methods/site_identification.jl +++ b/src/assessment_methods/site_identification.jl @@ -92,7 +92,7 @@ function assess_region( indicator[r.lon_idx, r.lat_idx] = true end - return Raster(params.region_data.valid_extent; data=indicator) + return Raster(params.region_data.valid_extent; data=indicator, missingval=0) end function assess_sites(params::SuitabilityAssessmentParameters) From 042c1fcf12f3fba1ea25cdaf86c1853ff1b3dffb Mon Sep 17 00:00:00 2001 From: Peter Baker Date: Fri, 6 Jun 2025 16:41:55 +1000 Subject: [PATCH 7/9] Removing unused method, and moving const to proper spot Signed-off-by: Peter Baker --- src/utility/assessment_interfaces.jl | 14 -------------- src/utility/regions_criteria_setup.jl | 3 ++- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/src/utility/assessment_interfaces.jl b/src/utility/assessment_interfaces.jl index 6771dd4..7364cb2 100644 --- a/src/utility/assessment_interfaces.jl +++ b/src/utility/assessment_interfaces.jl @@ -222,17 +222,3 @@ function build_regional_assessment_file_path( return file_path end - - -""" -Converts parameters from a suitability assessment into a regional assessment -""" -function regional_params_from_suitability_params( - suitability_params::SuitabilityAssessmentParameters -)::RegionalAssessmentParameters - return RegionalAssessmentParameters(; - region=suitability_params.region, - regional_criteria=suitability_params.regional_criteria, - region_data=suitability_params.region_data, - ) -end diff --git a/src/utility/regions_criteria_setup.jl b/src/utility/regions_criteria_setup.jl index 4ad76c2..7c61b27 100644 --- a/src/utility/regions_criteria_setup.jl +++ b/src/utility/regions_criteria_setup.jl @@ -4,6 +4,7 @@ const REGIONAL_DATA_CACHE_FILENAME = "regional_cache_v2.dat" const SLOPES_LOOKUP_SUFFIX = "_valid_slopes_lookup.parq" +const SLOPES_RASTER_SUFFIX = "_valid_slopes.tif" const DEFAULT_CANONICAL_REEFS_FILE_NAME = "rrap_canonical_outlines.gpkg" # ============================================================================= @@ -659,7 +660,7 @@ function initialise_data(config::Dict)::RegionalData raster_stack = RasterStack(data_paths; name=data_names, lazy=true) extent_path = joinpath( - data_source_directory, "$(region_metadata.id)_valid_slopes.tif" + data_source_directory, "$(region_metadata.id)$(SLOPES_RASTER_SUFFIX)" ) valid_extent = Raster(extent_path; lazy=true) From 2faff00aec80c15ac30784eab3b423cf9877c39a Mon Sep 17 00:00:00 2001 From: Takuya Iwanaga Date: Fri, 6 Jun 2025 17:40:02 +1000 Subject: [PATCH 8/9] Reduce repeated max extraction We've already stored the dim values anyway --- src/assessment_methods/site_identification.jl | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/assessment_methods/site_identification.jl b/src/assessment_methods/site_identification.jl index fa4a739..469dc22 100644 --- a/src/assessment_methods/site_identification.jl +++ b/src/assessment_methods/site_identification.jl @@ -72,14 +72,10 @@ function assess_region( # Arbitrary threshold - use sparse matrices if likely to exceed memory if mask_size_MB < 700 @debug "$(now()) : Creating mask as a regular matrix (est. size: $(mask_size_MB))" - indicator = zeros( - Int8, maximum(lookup_tbl.lon_idx), maximum(lookup_tbl.lat_idx) - ) + indicator = zeros(Int8, region_dims...) else @debug "$(now()) : Creating mask as a sparse matrix (est. size: $(mask_size_MB))" - indicator = ExtendableSparseMatrix{Int8,Int64}( - maximum(lookup_tbl.lon_idx), maximum(lookup_tbl.lat_idx) - ) + indicator = ExtendableSparseMatrix{Int8,Int64}(region_dims...) end # Builds out a set of criteria filters using the regional criteria. From 9183931fb7a7cadbcd487632bc65dd934b7d4ca7 Mon Sep 17 00:00:00 2001 From: Takuya Iwanaga Date: Fri, 6 Jun 2025 22:20:31 +1000 Subject: [PATCH 9/9] Format files Appease the great linter! --- src/assessment_methods/geom_ops.jl | 6 +++--- src/utility/assessment_interfaces.jl | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/assessment_methods/geom_ops.jl b/src/assessment_methods/geom_ops.jl index 9e0dfa2..302a09d 100644 --- a/src/assessment_methods/geom_ops.jl +++ b/src/assessment_methods/geom_ops.jl @@ -195,9 +195,9 @@ Vector containing tuples of coordinates for a horizontal line found within geom. function find_horizontal(geom::GIWrap.Polygon)::Vector{Tuple{Float64,Float64}} coords = collect(GI.coordinates(geom)...) first_coord = first(coords) - second_coord = coords[ - (getindex.(coords, 2) .∈ first_coord[2]) .&& (getindex.(coords, 1) .∉ first_coord[1]) - ] + lat_in_first = (getindex.(coords, 2) .∈ first_coord[2]) + lon_not_in_first = (getindex.(coords, 1) .∉ first_coord[1]) + second_coord = coords[lat_in_first .&& lon_not_in_first] return [tuple(first_coord...), tuple(first(second_coord)...)] end diff --git a/src/utility/assessment_interfaces.jl b/src/utility/assessment_interfaces.jl index 7364cb2..c8ee018 100644 --- a/src/utility/assessment_interfaces.jl +++ b/src/utility/assessment_interfaces.jl @@ -25,7 +25,7 @@ struct RegionalAssessmentParameters function RegionalAssessmentParameters(; region::String, regional_criteria::BoundedCriteriaDict, - region_data::RegionalDataEntry, + region_data::RegionalDataEntry ) return new(region, regional_criteria, region_data) end @@ -76,12 +76,12 @@ end """ Merge user-specified bounds with regional defaults. -Creates bounds using user values where provided, falling back to regional +Creates bounds using user values where provided, falling back to regional bounds for unspecified values. Returns nothing if regional criteria is not available. # Arguments - `user_min::OptionalValue{Float64}` : User-specified minimum value (optional) -- `user_max::OptionalValue{Float64}` : User-specified maximum value (optional) +- `user_max::OptionalValue{Float64}` : User-specified maximum value (optional) - `regional_criteria::OptionalValue{RegionalCriteriaEntry}` : Regional criteria with default bounds (optional) # Returns @@ -117,7 +117,6 @@ const PARAM_MAP::Dict{String,OptionalValue{Tuple{Symbol,Symbol}}} = Dict( "Rugosity" => (:rugosity_min, :rugosity_max) ) - """ Generate a deterministic hash string for RegionalAssessmentParameters.