Skip to content
This repository was archived by the owner on Aug 25, 2025. It is now read-only.

Commit b6c18e3

Browse files
authored
Quality checking (#9)
* MOD: Remove pyproject.toml dependencies (using conda from now on * FIX: Correct errors introduced in previous commits * MOD: Correct version in CITATION.cff * MOD: Correct Structure in README.md * MOD: -_- * MOD: Remove unused CLI * MOD: Load data as an xarray Dataset * WIP: Mockup of value assignation using xarray selection from a common Dataset * WIP: Still * WIP: lazy_concat not working * WIP: Trouble merging dataarrays into a Dataset * WIP: Turn program into a script type of code (use functions as a top bottom script) * MOD: Treat both buffer sizes in the loop (more efficient since the data is elected and merged each time) * FIX: Local hook doesn't work * FIX: Merge error was caused by the coordinates names * FIX: Created DataFrame with old columns * WIP: EPSG problems (loads 3857 but doesn't change it) * WIP: Projections error fixed * ADD: Functions get_bbox and merge_ds * DEL: Parameter loc (want to compute distance everytime * MOD: Test on the whole data, forgot comment * FIX: Column names when creating DataFrame * FIX: Anophelian diversity error * FIX: NDVI values not accessed through chunks * DEL: Remove unused functions * FIX: Typos * MOD: Rework get_pixel_count function to take advantage of xarray's capabilities * MOD: Rewrite landuse computation, fix typos * FIX: Typos * FIX: Retrieving length of landuse before pivoting the DatFrame * DEL: Remove function get_countries_profile * MOD: Compute the habitat diversity for 500 and 2000 meters buffers. Rework structure to not read the same files over again * MOD: Set datasets as a parameter instead of a config file value * FIX: Set values for multiple columns with iloc, replace Merge function by a clip function (only select data from bbox extent) * MOD: Remove unused code * MOD: Rename files * ADD: Export file to csv * DEL: Useless module * MOD: Merge code used only one time * MO: Bump version number to 0.3.0 * MOD: Update metadata * Autoupdate every month instead of every day
1 parent 5ded032 commit b6c18e3

18 files changed

+449
-1526
lines changed

.github/workflows/pre-commit-autoupdate.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name: Pre-commit auto-update
22
on:
33
schedule:
4-
- cron: '0 0 * * *'
4+
- cron: '0 0 0 * 0'
55
jobs:
66
auto-update:
77
runs-on: ubuntu-latest

.pre-commit-config.yaml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,6 @@ repos:
33
hooks:
44
- id: check-hooks-apply
55
- id: check-useless-excludes
6-
- repo: local
7-
hooks:
8-
- id: pre-commit-autoupdate
9-
name: pre-commit-autoupdate
10-
entry: bash -c 'pre-commit autoupdate'
11-
language: system
12-
types: [python]
13-
pass_filenames: false
146
- repo: https://github.com/pre-commit/pre-commit-hooks
157
rev: v4.4.0
168
hooks:

CITATION.cff

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,5 @@ license: "GPL-3.0"
1717
message: "If you use this software, please cite it using these metadata."
1818
repository-code: "https://github.com/pierre-manchon/INPMT/"
1919
title: INPMT
20-
version: "v0.2.0"
20+
version: "v0.2.2"
2121
...

INPMT/__main__.py

Lines changed: 41 additions & 295 deletions
Original file line numberDiff line numberDiff line change
@@ -17,318 +17,64 @@
1717
You should have received a copy of the GNU General Public License
1818
along with this program. If not, see <https://www.gnu.org/licenses/>.
1919
"""
20-
import argparse
2120
import os
2221
import warnings
23-
from argparse import ArgumentTypeError
24-
from shlex import quote as shlex_quote
25-
from sys import argv, exit, stderr
26-
from tempfile import TemporaryDirectory
27-
from typing import AnyStr
2822

29-
import numpy as np
23+
import rioxarray as rxr
24+
from pandas import DataFrame
3025

3126
try:
32-
from __processing import get_countries_profile, get_urban_profile
33-
from utils.utils import format_dataset_output, get_cfg_val, set_cfg_val
27+
from processing import get_urban_profile
3428
except ImportError:
35-
from INPMT.__processing import get_countries_profile, get_urban_profile
36-
from INPMT.utils.utils import (
37-
format_dataset_output,
38-
get_cfg_val,
39-
set_cfg_val,
40-
)
29+
from INPMT.processing import get_urban_profile
4130

4231
warnings.filterwarnings("ignore")
4332

4433

45-
def run(
46-
method: AnyStr = ("villages", "countries"),
47-
export_dir: AnyStr = "results",
48-
loc: bool = True,
49-
) -> None:
34+
def run(datasets: str) -> DataFrame:
5035
"""
5136
Retrieves the datasets path and executes the functions.
5237
For the countries, i only execute it like that.
5338
For the villages, i execute first after setting the buffer parameter to
5439
500, then i execute it after setting the
5540
parameter to 2000. Then i merge the two results.
5641
57-
:param loc:
58-
:param method: Execute whether by villages or countries
59-
:type method: AnyStr
60-
:param export_dir:
61-
:type export_dir: AnyStr
6242
:return: Nothing
6343
:rtype: None
6444
"""
65-
valid_method = ["countries", "villages"]
66-
if method not in valid_method:
67-
raise ValueError(
68-
f"Invalid method parameter. Expected one of {valid_method}"
69-
)
70-
datasets = get_cfg_val("datasets_storage_path")
71-
export = os.path.join(datasets, export_dir)
72-
73-
# Raster data
74-
population = os.path.join(datasets, "POPULATION_AFRICA_100m_reprj3857.tif")
75-
landuse = os.path.join(
76-
datasets, "LANDUSE_ESACCI-LC-L4-LC10-Map-300m-P1Y-2016-v1.0.tif"
77-
)
78-
ndvi = os.path.join(
79-
datasets, "NDVI_MOD13A1.006__500m_16_days_NDVI_doy2016_aid0001.tif"
80-
)
81-
swi = os.path.join(
82-
datasets, "SWI_c_gls_SWI10_QL_2016_AFRICA_ASCAT_V3.1.1_reprj3857.tif"
83-
)
84-
gws = os.path.join(datasets, "GWS_seasonality_AFRICA_reprj3857.tif")
85-
prevalence = os.path.join(
86-
datasets, "PREVALENCE_2019_Global_PfPR_2016_reprj3857.tif"
87-
)
88-
89-
# Vector data
90-
irish = os.path.join(datasets, "IRISH_countries.shp")
91-
landuse_polygonized = os.path.join(
92-
datasets, "LANDUSE_ESACCI-LC-L4-LC10-Map-300m-P1Y-2016-v1.0.shp"
93-
)
94-
anopheles_kyalo = os.path.join(datasets, "KYALO_VectorDB_1898-2016.shp")
95-
os.path.join(
96-
datasets, "KYALO_anopheles_in_PAs_buffers.shp"
97-
) # noqa F841
98-
os.path.join(
99-
datasets, "KYALO_anopheles_out_PAs_buffers.shp"
100-
)
101-
national_parks_with_anopheles_kyalo = os.path.join(
102-
datasets, "NATIONAL_PARKS_WDPA_Africa_anopheles.shp"
103-
)
104-
105-
anopheles_kyalo_all_buffered = os.path.join(datasets, "KYALO_anopheles_all_PAs_buffers.shp")
106-
107-
with TemporaryDirectory(prefix="INPMT_") as tmp_directory:
108-
# Read file as a geodataframe
109-
if method == "countries":
110-
gdf_profiles_aoi, path_profiles_aoi = get_countries_profile(
111-
aoi=irish,
112-
landuse=landuse,
113-
landuse_polygonized=landuse_polygonized,
114-
anopheles=anopheles_kyalo,
115-
processing_directory=tmp_directory,
116-
)
117-
# Retrieves the directory the dataset is in and joins it the output
118-
# filename
119-
_, _, output_profiles = format_dataset_output(
120-
dataset=export_dir, name="countries_profiles"
121-
)
122-
gdf_profiles_aoi.to_file(output_profiles)
123-
if method == "villages":
124-
set_cfg_val("buffer_villages", "500")
125-
profile_villages_500 = get_urban_profile(
126-
villages=anopheles_kyalo_all_buffered,
127-
parks=national_parks_with_anopheles_kyalo,
128-
landuse=landuse,
129-
population=population,
130-
ndvi=ndvi,
131-
swi=swi,
132-
gws=gws,
133-
prevalence=prevalence,
134-
processing_directory=tmp_directory,
135-
loc=loc,
136-
)
137-
set_cfg_val("buffer_villages", "2000")
138-
profile_villages_2000 = get_urban_profile(
139-
villages=anopheles_kyalo_all_buffered,
140-
parks=national_parks_with_anopheles_kyalo,
141-
landuse=landuse,
142-
population=population,
143-
ndvi=ndvi,
144-
swi=swi,
145-
gws=gws,
146-
prevalence=prevalence,
147-
processing_directory=tmp_directory,
148-
loc=loc,
149-
)
150-
151-
# https://stackoverflow.com/a/50865526
152-
# Merge the two dataframes in one (side by side) with the column
153-
# suffix
154-
profile_villages = profile_villages_500.reset_index(drop=True
155-
).merge(
156-
profile_villages_2000.reset_index(drop=True),
157-
left_index=True,
158-
right_index=True,
159-
suffixes=("_500", "_2000"),
160-
)
161-
# Rename and delete the duplicated columns
162-
profile_villages.rename(
163-
columns={
164-
"ID_500": "ID",
165-
"x_500": "x",
166-
"y_500": "y",
167-
"NP_500": "NP",
168-
"loc_NP_500": "loc_NP",
169-
"dist_NP_500": "dist_NP",
170-
},
171-
inplace=True,
172-
)
173-
profile_villages = profile_villages.drop(
174-
[
175-
"ID_2000",
176-
"x_2000",
177-
"y_2000",
178-
"NP_2000",
179-
"loc_NP_2000",
180-
"dist_NP_2000",
181-
],
182-
axis=1,
183-
)
184-
# Change nan values to NULL string
185-
# https://stackoverflow.com/a/26838140/12258568
186-
profile_villages = profile_villages.replace(np.nan,
187-
"NULL",
188-
regex=True)
189-
# Retrieves the directory the dataset is in and joins it the output
190-
# filename
191-
_, _, output_urban_profiles = format_dataset_output(
192-
dataset=export, name="urban_profiles", ext=".xlsx"
193-
)
194-
profile_villages.to_excel(output_urban_profiles)
195-
print("Jesus was black.")
196-
197-
198-
def main():
199-
"""
200-
Function to manage the CLI
201-
202-
:return: Nothing
203-
:rtype: None
204-
"""
205-
# Clean the terminal everytime a command is triggered
206-
os.system(shlex_quote("cls" if os.name == "nt" else "clear"))
207-
208-
class ArgumentParser(argparse.ArgumentParser):
209-
"""Object for parsing command line strings into Python objects.
210-
Overridden to print the help whenever an error occurred (For example,
211-
no arguments error)
212-
213-
Keyword Arguments:
214-
- prog -- The name of the program (default: sys.argv[0])
215-
- usage -- A usage message (default: auto-generated from arguments)
216-
- description -- A description of what the program does
217-
- epilog -- Text following the argument descriptions
218-
- parents -- Parsers whose arguments should be copied into this one
219-
- formatter_class -- HelpFormatter class for printing help messages
220-
- prefix_chars -- Characters that prefix optional arguments
221-
- fromfile_prefix_chars -- Characters that prefix files containing
222-
additional arguments
223-
- argument_default -- The default value for all arguments
224-
- conflict_handler -- String indicating how to handle conflicts
225-
- add_help -- Add a -h/-help option
226-
- allow_abbrev -- Allow long options to be abbreviated
227-
unambiguously
228-
- exit_on_error -- Determines whether or not ArgumentParser exits
229-
with error info when an error occurs
230-
"""
231-
232-
def error(self, message):
233-
"""
234-
235-
:param message:
236-
"""
237-
stderr.write("error: %s\n" % message)
238-
self.print_help()
239-
exit(2)
240-
241-
def file_path(dirpath: AnyStr) -> AnyStr:
242-
"""
243-
Returns a path only if it is a valid one
244-
245-
:param dirpath:
246-
:type dirpath AnyStr
247-
:return: normalized_filepath
248-
:rtype normalized_filepath AnyStr
249-
"""
250-
normalized_filepath = os.path.normpath(dirpath)
251-
if os.path.isfile(normalized_filepath):
252-
return normalized_filepath
253-
else:
254-
raise ArgumentTypeError(f'"{dirpath}" is not a valid path \n')
255-
256-
parser = ArgumentParser(
257-
prog="$ python INPMT",
258-
description="",
259-
add_help=False,
260-
epilog="\n",
261-
)
262-
263-
# Create the arguments
264-
parser.add_argument(
265-
"-h", "--help", action="help", help="Show this help message and exit."
266-
)
267-
parser.add_argument(
268-
"-d",
269-
"--description",
270-
dest="description",
271-
action="store_true",
272-
help="Show the program's description and exit.",
273-
)
274-
parser.add_argument(
275-
"-l",
276-
"--license",
277-
dest="license",
278-
action="store_true",
279-
help="Show the program's license and exit.",
280-
)
281-
parser.add_argument(
282-
"-c", "--config", nargs="*", help="Read or overwrite local config file"
283-
)
284-
parser.add_argument(
285-
"-m",
286-
"--method",
287-
nargs="?",
288-
type=file_path,
289-
help="How do you want to process the data ['countries', 'villages'].",
290-
)
291-
parser.add_argument(
292-
"-e",
293-
"--export",
294-
nargs="?",
295-
type=file_path,
296-
help="Where do you the result to be saved.",
297-
)
298-
# If no arguments are given, print the help
299-
if len(argv) == 1:
300-
parser.print_help(stderr)
301-
exit(1)
302-
303-
# Parse the arguments
304-
args = parser.parse_args()
305-
306-
# Based on the dest vars execute methods with the arguments
307-
try:
308-
if args.license is True:
309-
print('GPL-3.0')
310-
elif args.description is True:
311-
print('Impact of National Parks on Malaria Transmission')
312-
elif args.config is not None:
313-
if len(args.config) == 2:
314-
var, value = args.config
315-
set_cfg_val(var, value)
316-
with open('INPMT/config.cfg') as cfg:
317-
print(cfg.read())
318-
elif len(args.config) == 0:
319-
with open('INPMT/config.cfg') as cfg:
320-
print(cfg.read())
321-
elif args.method:
322-
run(method=args.method, export_dir=args.export)
323-
except AttributeError:
324-
parser.print_help(stderr)
325-
exit(1)
326-
327-
328-
# Execute outside the if __name__ == '__main__' because I the main function to
329-
# be accessible from the entry point in
330-
# setup.py
331-
if __name__ == "__main__":
332-
main()
333-
else:
334-
pass
45+
# Convert all raster data as xarray DataArrays
46+
population = rxr.open_rasterio(os.path.join(datasets, "POPULATION_AFRICA_100m_reprj3857.tif"))
47+
landuse = rxr.open_rasterio(os.path.join(datasets, "LANDUSE_ESACCI-LC-L4-LC10-Map-300m-P1Y-2016-v1.0.tif"))
48+
ndvi = rxr.open_rasterio(os.path.join(datasets, "NDVI_MOD13A1.006__500m_16_days_NDVI_doy2016_aid0001.tif"))
49+
swi = rxr.open_rasterio(os.path.join(datasets, "SWI_c_gls_SWI10_QL_2016_AFRICA_ASCAT_V3.1.1_reprj3857.tif"))
50+
gws = rxr.open_rasterio(os.path.join(datasets, "GWS_seasonality_AFRICA_reprj3857.tif"))
51+
prevalence = rxr.open_rasterio(os.path.join(datasets, "PREVALENCE_2019_Global_PfPR_2016_reprj3857.tif"))
52+
53+
# Add name attribute
54+
population.name = 'population'
55+
landuse.name = 'landuse'
56+
ndvi.name = 'ndvi'
57+
swi.name = 'swi'
58+
gws.name = 'gws'
59+
prevalence.name = 'prevalence'
60+
61+
# Convert all vector data as a WKT geometry
62+
os.path.join(datasets, "IRISH_countries.shp")
63+
os.path.join(datasets, "LANDUSE_ESACCI-LC-L4-LC10-Map-300m-P1Y-2016-v1.0.shp")
64+
anopheles_kyalo_in_PAs_buffers = os.path.join(datasets, "KYALO_anopheles_in_PAs_buffers.shp") # noqa F841
65+
national_parks_with_anopheles_kyalo = os.path.join(datasets, "NATIONAL_PARKS_WDPA_Africa_anopheles.shp")
66+
anopheles_kyalo = os.path.join(datasets, "KYALO_anopheles.shp")
67+
68+
profile_villages = get_urban_profile(
69+
datasets=datasets,
70+
villages=anopheles_kyalo,
71+
parks=national_parks_with_anopheles_kyalo,
72+
population=population,
73+
landuse=landuse,
74+
ndvi=ndvi,
75+
swi=swi,
76+
gws=gws,
77+
prevalence=prevalence)
78+
profile_villages.to_csv(os.path.join(datasets, 'profile_villages.csv'))
79+
print("Jesus was black.")
80+
return profile_villages

0 commit comments

Comments
 (0)