Skip to content

Commit 24dc785

Browse files
authored
Merge pull request #32 from swiss-territorial-data-lab/ch/add_FP_EPT
Ch/add fp ept
2 parents 7780f7f + 21813cc commit 24dc785

File tree

9 files changed

+433
-130
lines changed

9 files changed

+433
-130
lines changed

examples/mineral-extract-sites-detection/config_det.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ generate_tilesets.py:
1919
datasets:
2020
aoi_tiles: output_det/tiles.geojson
2121
image_source:
22-
type: XYZ # supported values: 1. MIL = Map Image Layer 2. WMS 3. XYZ
22+
type: XYZ # supported values: 1. MIL = Map Image Layer 2. WMS 3. XYZ 4. FOLDER
2323
location: https://wmts.geo.admin.ch/1.0.0/ch.swisstopo.swissimage-product/default/2020/3857/{z}/{x}/{y}.jpeg
2424
output_folder: output_det
2525
tile_size: 256 # per side, in pixels

examples/mineral-extract-sites-detection/config_trne.yaml

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,25 +2,34 @@
22
prepare_data.py:
33
srs: EPSG:2056
44
datasets:
5-
shapefile: ./data/labels/tlm-hr-trn-topo.shp
5+
shapefile: ./data/labels/tlm-hr-trn-topo.shp # GT labels
6+
FP_shapefile: ./data/FP/FP_list.gpkg # FP labels
7+
empty_tiles_aoi: ./data/AoI/AoI_2020.shp # AOI in which additional empty tiles can be selected. Only one 'empty_tiles' option can be selected
8+
# empty_tiles_shp: .data/EPT/<SHPFILE> # Provided shpfile of selected empty tiles. Only one 'empty_tiles' option can be selected
69
output_folder: ./output/output_trne
7-
zoom_level: 16
10+
zoom_level: 16
811

912
# 2-Fetch of tiles and split into 3 datasets: train, test, validation
1013
generate_tilesets.py:
1114
debug_mode:
1215
enable: False # sample of tiles
13-
nb_tiles_max: 500
16+
nb_tiles_max: 1000
1417
working_directory: output
1518
datasets:
1619
aoi_tiles: output_trne/tiles.geojson
1720
ground_truth_labels: output_trne/labels.geojson
21+
FP_labels: output_trne/FP.geojson
1822
image_source:
19-
type: XYZ # supported values: 1. MIL = Map Image Layer 2. WMS 3. XYZ 4. FOLDER
20-
year: 2020 # supported values: 1. multi-year (tiles of different year), 2. <year> (i.e. 2020)
23+
type: XYZ # supported values: 1. MIL = Map Image Layer 2. WMS 3. XYZ 4. FOLDER
24+
year: 2020
2125
location: https://wmts.geo.admin.ch/1.0.0/ch.swisstopo.swissimage-product/default/{year}/3857/{z}/{x}/{y}.jpeg
26+
empty_tiles: # add empty tiles to datasets
27+
tiles_frac: 0.5 # fraction (relative to the number of tiles intersecting labels) of empty tiles to add
28+
frac_trn: 0.75 # fraction of empty tiles to add to the trn dataset, then the remaining tiles will be split in 2 and added to tst and val datasets
29+
keep_oth_tiles: False # keep tiles in oth dataset not intersecting oth labels
2230
output_folder: output_trne
23-
tile_size: 256 # per side, in pixels
31+
tile_size: 256 # per side, in pixels
32+
seed: 42
2433
overwrite: True
2534
n_jobs: 10
2635
COCO_metadata:
@@ -57,7 +66,7 @@ make_detections.py:
5766
tst: COCO_tst.json
5867
detectron2_config_file: ../../detectron2_config_dqry.yaml # path relative to the working_folder
5968
model_weights:
60-
pth_file: ./logs/model_final.pth # trained model minimising the validation loss curve, monitor the training process via tensorboard (tensorboard --logdir </logs>)
69+
pth_file: ./logs/model_0000999.pth # trained model minimising the validation loss curve, monitor the training process via tensorboard (tensorboard --logdir </logs>)
6170
image_metadata_json: img_metadata.json
6271
rdp_simplification: # rdp = Ramer-Douglas-Peucker
6372
enabled: true
Binary file not shown.

examples/mineral-extract-sites-detection/prepare_data.py

Lines changed: 173 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import geopandas as gpd
1212
import morecantile
1313
import pandas as pd
14+
from shapely.geometry import Polygon
1415

1516
sys.path.insert(0, '../..')
1617
from helpers.misc import format_logger
@@ -21,6 +22,14 @@
2122

2223

2324
def add_tile_id(row):
25+
"""Attribute tile id
26+
27+
Args:
28+
row (DataFrame): row of a given df
29+
30+
Returns:
31+
DataFrame: row with addition 'id' column
32+
"""
2433

2534
re_search = re.search('(x=(?P<x>\d*), y=(?P<y>\d*), z=(?P<z>\d*))', row.title)
2635
if 'year' in row.keys():
@@ -31,15 +40,60 @@ def add_tile_id(row):
3140
return row
3241

3342

43+
def aoi_tiling(gdf):
44+
"""Tiling of an AoI
45+
46+
Args:
47+
gdf (GeoDataFrame): gdf containing all the bbox boundary coordinates
48+
49+
Returns:
50+
Geodataframe: gdf containing the tiles shape of the bbox of the AoI
51+
"""
52+
53+
# Grid definition
54+
tms = morecantile.tms.get('WebMercatorQuad') # epsg:3857
55+
56+
tiles_all = []
57+
for boundary in gdf.itertuples():
58+
coords = (boundary.minx, boundary.miny, boundary.maxx, boundary.maxy)
59+
tiles = gpd.GeoDataFrame.from_features([tms.feature(x, projected=False) for x in tms.tiles(*coords, zooms=[ZOOM_LEVEL])])
60+
tiles.set_crs(epsg=4326, inplace=True)
61+
tiles_all.append(tiles)
62+
tiles_all_gdf = gpd.GeoDataFrame(pd.concat(tiles_all, ignore_index=True))
63+
64+
return tiles_all_gdf
65+
66+
67+
def bbox(bounds):
68+
"""Get a vector bounding box of a 2D shape
69+
70+
Args:
71+
bounds (array): minx, miny, maxx, maxy of the bounding box
72+
73+
Returns:
74+
geometry (Polygon): polygon geometry of the bounding box
75+
"""
76+
77+
minx = bounds[0]
78+
miny = bounds[1]
79+
maxx = bounds[2]
80+
maxy = bounds[3]
81+
82+
return Polygon([[minx, miny],
83+
[maxx, miny],
84+
[maxx, maxy],
85+
[minx, maxy]])
86+
87+
3488
if __name__ == "__main__":
3589

3690
# Start chronometer
3791
tic = time.time()
3892
logger.info('Starting...')
3993

4094
# Argument and parameter specification
41-
parser = argparse.ArgumentParser(description='The script prepares the Mineral Extraction Sites dataset to be processed by the object-detector scripts')
42-
parser.add_argument('config_file', type=str, help='Framework configuration file')
95+
parser = argparse.ArgumentParser(description="The script prepares the Mineral Extraction Sites dataset to be processed by the object-detector scripts")
96+
parser.add_argument('config_file', type=str, help="Framework configuration file")
4397
args = parser.parse_args()
4498

4599
logger.info(f"Using {args.config_file} as config file.")
@@ -50,6 +104,19 @@ def add_tile_id(row):
50104
# Load input parameters
51105
OUTPUT_DIR = cfg['output_folder']
52106
SHPFILE = cfg['datasets']['shapefile']
107+
FP_SHPFILE = cfg['datasets']['FP_shapefile'] if 'FP_shapefile' in cfg['datasets'].keys() else None
108+
if 'empty_tiles_aoi' in cfg['datasets'].keys() and 'empty_tiles_shp' in cfg['datasets'].keys():
109+
logger.error("Choose one option between providing an AoI shapefile ('empty_tiles_aoi') in which empty tiles will be selected and a shapefile with selected empty tiles ('empty_tiles_shp')")
110+
sys.exit(1)
111+
if 'empty_tiles_aoi' in cfg['datasets'].keys():
112+
EPT_SHPFILE = cfg['datasets']['empty_tiles_aoi']
113+
EPT = 'aoi'
114+
elif 'empty_tiles_shp' in cfg['datasets'].keys():
115+
EPT_SHPFILE = cfg['datasets']['empty_tiles_shp']
116+
EPT = 'shp'
117+
else:
118+
EPT_SHPFILE = None
119+
EPT = None
53120
ZOOM_LEVEL = cfg['zoom_level']
54121

55122
# Create an output directory in case it doesn't exist
@@ -60,70 +127,124 @@ def add_tile_id(row):
60127

61128
# Prepare the tiles
62129

63-
## Convert datasets shapefiles into geojson format
64-
logger.info('Convert labels shapefile into GeoJSON format (EPSG:4326)...')
130+
# Convert datasets shapefiles into geojson format
131+
logger.info("Convert labels shapefile into GeoJSON format (EPSG:4326)...")
65132
labels = gpd.read_file(SHPFILE)
66133
labels_4326 = labels.to_crs(epsg=4326)
67134
labels_4326['CATEGORY'] = 'quarry'
68135
labels_4326['SUPERCATEGORY'] = 'land usage'
69136

70137
nb_labels = len(labels)
71-
logger.info(f'There is/are {nb_labels} polygons in {SHPFILE}')
138+
logger.info(f"There are {nb_labels} polygons in {SHPFILE}")
72139

73-
label_filename = 'labels.geojson'
74-
label_filepath = os.path.join(OUTPUT_DIR, label_filename)
75-
labels_4326.to_file(label_filepath, driver='GeoJSON')
76-
written_files.append(label_filepath)
77-
logger.success(f"{DONE_MSG} A file was written: {label_filepath}")
140+
filename = 'labels.geojson'
141+
filepath = os.path.join(OUTPUT_DIR, filename)
142+
labels_4326.to_file(filepath, driver='GeoJSON')
143+
written_files.append(filepath)
144+
logger.success(f"{DONE_MSG} A file was written: {filepath}")
78145

79-
logger.info('Creating tiles for the Area of Interest (AoI)...')
80-
81-
# Grid definition
82-
tms = morecantile.tms.get('WebMercatorQuad') # epsg:3857
146+
# Add FP labels if it exists
147+
if FP_SHPFILE:
148+
fp_labels = gpd.read_file(FP_SHPFILE)
149+
fp_labels_4326 = fp_labels.to_crs(epsg=4326)
150+
fp_labels_4326['CATEGORY'] = 'quarry'
151+
fp_labels_4326['SUPERCATEGORY'] = 'land usage'
152+
153+
nb_fp_labels = len(fp_labels)
154+
logger.info(f"There are {nb_fp_labels} polygons in {FP_SHPFILE}")
155+
156+
filename = 'FP.geojson'
157+
filepath = os.path.join(OUTPUT_DIR, filename)
158+
fp_labels_4326.to_file(filepath, driver='GeoJSON')
159+
written_files.append(filepath)
160+
logger.success(f"{DONE_MSG} A file was written: {filepath}")
161+
162+
labels_4326 = pd.concat([labels_4326, fp_labels_4326], ignore_index=True)
163+
else:
164+
labels_4326 = labels_4326
165+
166+
# Keep only label boundary geometry info (minx, miny, maxx, maxy)
167+
logger.info("- Get the label boundaries")
168+
boundaries_df = labels_4326.bounds
169+
170+
# Get the boundaries for all the labels (minx, miny, maxx, maxy)
171+
global_boundaries_gdf = labels_4326.dissolve() if len(labels_4326) > 0 else labels_4326
172+
labels_bbox = bbox(global_boundaries_gdf.iloc[0].geometry.bounds)
83173

84-
# New gpd with only labels geometric info (minx, miny, maxx, maxy)
85-
logger.info('- Get geometric boundaries of the labels')
86-
label_boundaries_df = labels_4326.bounds
87-
88-
# Iterate on geometric coordinates to defined tiles for a given label at a given zoom level
89-
# A gpd is created for each label and are then concatenate into a single gpd
90-
logger.info('- Compute tiles for each labels geometry')
91-
tiles_4326_all = []
92-
93-
for label_boundary in label_boundaries_df.itertuples():
94-
coords = (label_boundary.minx, label_boundary.miny, label_boundary.maxx, label_boundary.maxy)
95-
tiles_4326 = gpd.GeoDataFrame.from_features([tms.feature(x, projected=False) for x in tms.tiles(*coords, zooms=[ZOOM_LEVEL])])
96-
tiles_4326.set_crs(epsg=4326, inplace=True)
97-
tiles_4326_all.append(tiles_4326)
98-
tiles_4326_aoi = gpd.GeoDataFrame(pd.concat(tiles_4326_all, ignore_index=True))
99-
100-
# Remove unrelevant tiles and reorganised the data set:
101-
logger.info('- Remove duplicated tiles and tiles that are not intersecting labels')
102-
103-
# - Keep only tiles that are actually intersecting labels
104-
labels_4326.rename(columns={'FID': 'id_aoi'}, inplace=True)
105-
tiles_4326 = gpd.sjoin(tiles_4326_aoi, labels_4326, how='inner')
106-
107-
# - Remove duplicated tiles
108-
if nb_labels > 1:
109-
tiles_4326.drop_duplicates(['title', 'year'] if 'year' in tiles_4326.keys() else 'title', inplace=True)
110-
111-
# - Remove useless columns, reset feature id and redefine it according to xyz format
112-
logger.info('- Add tile IDs and reorganise data set')
113-
tiles_4326 = tiles_4326[['geometry', 'title', 'year'] if 'year' in tiles_4326.keys() else ['geometry', 'title']].copy()
114-
tiles_4326.reset_index(drop=True, inplace=True)
174+
# Get tiles for a given AoI from which empty tiles will be selected when the images are retrieved
175+
# Get tiles for a given AoI from which empty tiles will be selected when the images are retrieved
176+
if EPT_SHPFILE:
177+
EPT_aoi = gpd.read_file(EPT_SHPFILE)
178+
EPT_aoi_4326 = EPT_aoi.to_crs(epsg=4326)
179+
180+
if EPT == 'aoi':
181+
logger.info("- Get AoI boundaries")
182+
EPT_aoi_boundaries_df = EPT_aoi_4326.bounds
183+
184+
# Get the boundaries for all the AoI (minx, miny, maxx, maxy)
185+
EPT_aoi_boundaries_gdf = EPT_aoi_4326.dissolve() if len(EPT_aoi_4326) > 0 else EPT_aoi_4326
186+
aoi_bbox = bbox(EPT_aoi_boundaries_gdf.iloc[0].geometry.bounds)
187+
aoi_bbox_contains = aoi_bbox.contains(labels_bbox)
188+
189+
if aoi_bbox_contains:
190+
logger.info("- The surface area occupied by the bbox of the AoI used to find empty tiles is bigger than the label's one. The AoI boundaries will be used for tiling")
191+
boundaries_df = EPT_aoi_boundaries_df
192+
else:
193+
logger.info("- The surface area occupied by the bbox of the AoI used to find empty tiles is smaller than the label's one. Both the AoI and labels area will be used for tiling")
194+
# Get tiles coordinates and shapes
195+
empty_tiles_4326_all = aoi_tiling(EPT_aoi_boundaries_df)
196+
# Delete tiles outside of the AoI limits
197+
empty_tiles_4326_aoi = gpd.sjoin(empty_tiles_4326_all, EPT_aoi_4326, how='inner', lsuffix='ept_tiles', rsuffix='ept_aoi')
198+
elif EPT == 'shp':
199+
empty_tiles_4326_aoi = EPT_aoi_4326
200+
aoi_bbox = None
201+
aoi_bbox_contains = False
202+
203+
logger.info("Creating tiles for the Area of Interest (AoI)...")
204+
205+
# Get tiles coordinates and shapes
206+
tiles_4326_aoi = aoi_tiling(boundaries_df)
207+
208+
if EPT_SHPFILE and aoi_bbox_contains:
209+
# Delete tiles outside of the AoI limits
210+
tiles_4326_aoi = gpd.sjoin(tiles_4326_aoi, EPT_aoi_4326, how='inner', lsuffix='ept_tiles', rsuffix='ept_aoi', predicate='intersects')
211+
212+
# Compute labels intersecting tiles
213+
tiles_gt_4326 = gpd.sjoin(tiles_4326_aoi, labels_4326, how='inner', predicate='intersects')
214+
tiles_gt_4326.drop_duplicates('title', inplace=True)
215+
logger.info(f"- Number of tiles intersecting GT labels = {len(tiles_gt_4326)}")
216+
if FP_SHPFILE:
217+
tiles_fp_4326 = gpd.sjoin(tiles_4326_aoi, fp_labels_4326, how='inner', predicate='intersects')
218+
tiles_fp_4326.drop_duplicates('title', inplace=True)
219+
logger.info(f"- Number of tiles intersecting FP labels = {len(tiles_fp_4326)}")
220+
221+
if not EPT_SHPFILE or EPT_SHPFILE and aoi_bbox_contains == False:
222+
# Keep only tiles intersecting labels
223+
tiles_4326_aoi = gpd.sjoin(tiles_4326_aoi, labels_4326, how='inner', predicate='intersects')
224+
tiles_4326_aoi.drop_duplicates('title', inplace=True)
225+
226+
# Get all the tiles in one gdf
227+
if EPT_SHPFILE and aoi_bbox_contains == False:
228+
logger.info("- Add label tiles to empty AoI tiles")
229+
tiles_4326_all = pd.concat([tiles_4326_aoi, empty_tiles_4326_aoi])
230+
else:
231+
tiles_4326_all = tiles_4326_aoi
115232

116-
# Add the ID column
117-
tiles_4326 = tiles_4326.apply(add_tile_id, axis=1)
233+
tiles_4326_all.drop_duplicates('title', inplace=True)
234+
235+
# Add tile IDs and reorganise data set
236+
tiles_4326_all = tiles_4326_all[['geometry', 'title']].copy()
237+
tiles_4326_all.reset_index(drop=True, inplace=True)
238+
tiles_4326_all = tiles_4326_all.apply(add_tile_id, axis=1)
118239

119-
nb_tiles = len(tiles_4326)
120-
logger.info(f'There was/were {nb_tiles} tiles created')
240+
nb_tiles = len(tiles_4326_all)
241+
logger.info(f"There were {nb_tiles} tiles created")
121242

122-
# Export tiles to GeoJSON
123-
logger.info('Export tiles to GeoJSON (EPSG:4326)...')
243+
# Save tile shapefile
244+
logger.info("Export tiles to GeoJSON (EPSG:4326)...")
124245
tile_filename = 'tiles.geojson'
125246
tile_filepath = os.path.join(OUTPUT_DIR, tile_filename)
126-
tiles_4326.to_file(tile_filepath, driver='GeoJSON')
247+
tiles_4326_all.to_file(tile_filepath, driver='GeoJSON')
127248
written_files.append(tile_filepath)
128249
logger.success(f"{DONE_MSG} A file was written: {tile_filepath}")
129250

examples/road-surface-classification/config_rs.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ generate_tilesets.py:
3131
year: 2018
3232
location: https://wmts.geo.admin.ch/1.0.0/ch.swisstopo.swissimage-product/default/{year}/3857/{z}/{x}/{y}.jpeg
3333
srs: "EPSG:3857"
34+
# empty_tiles: # add empty tiles to datasets
35+
# tiles_frac: 0.5 # fraction (relative to the number of tiles intersecting labels) of empty tiles to add
36+
# frac_trn: 0.75 # fraction of empty tiles to add to the trn dataset, then the remaining tiles will be split in 2 and added to tst and val datasets
37+
# keep_oth_tiles: False # keep tiles in oth dataset not intersecting oth labels
3438
output_folder: .
3539
tile_size: 256 # per side, in pixels
3640
overwrite: False

examples/swimming-pool-detection/GE/config_GE.yaml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,20 @@ prepare_data.py:
99
generate_tilesets.py:
1010
debug_mode:
1111
enable: False # sample of tiles
12-
nb_tiles_max: 100
12+
nb_tiles_max: 1000
1313
working_directory: .
1414
datasets:
1515
aoi_tiles: output_GE/aoi_z18_tiles.geojson
1616
ground_truth_labels: output_GE/ground_truth_labels.geojson
1717
other_labels: output_GE/other_labels.geojson
1818
image_source:
19-
type: MIL # supported values: 1. MIL = Map Image Layer 2. WMS
19+
type: MIL # supported values: 1. MIL = Map Image Layer 2. WMS 3. XYZ 4. FOLDER
2020
location: https://raster.sitg.ge.ch/arcgis/rest/services/ORTHOPHOTOS_2018_EPSG2056/MapServer
2121
srs: "EPSG:3857"
22+
# empty_tiles: # add empty tiles to datasets
23+
# tiles_frac: 0.5 # fraction (relative to the number of tiles intersecting labels) of empty tiles to add
24+
# frac_trn: 0.75 # fraction of empty tiles to add to the trn dataset, then the remaining tiles will be split in 2 and added to tst and val datasets
25+
# keep_oth_tiles: True # keep tiles in oth dataset not intersecting oth labels
2226
output_folder: output_GE
2327
tile_size: 256 # per side, in pixels
2428
overwrite: False

examples/swimming-pool-detection/GE/prepare_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
from loguru import logger
1414

15-
sys.path.insert(0, '.')
15+
sys.path.insert(1, '../../..')
1616
from helpers.misc import format_logger
1717

1818
logger = format_logger(logger)

0 commit comments

Comments
 (0)