41
41
import geopandas
42
42
from shapely .geometry .multipolygon import MultiPolygon
43
43
from shapely .geometry import Polygon
44
- from sklearn .cluster import KMeans
45
44
import sliderule
46
45
47
46
###############################################################################
51
50
# create logger
52
51
logger = logging .getLogger (__name__ )
53
52
53
+ # import cluster support
54
+ clustering_enabled = False
55
+ try :
56
+ from sklearn .cluster import KMeans
57
+ clustering_enabled = True
58
+ except :
59
+ logger .warning ("Unable to import sklearn... clustering support disabled" )
60
+
54
61
# profiling times for each major function
55
62
profiles = {}
56
63
@@ -503,12 +510,8 @@ def __gdf2poly(gdf):
503
510
#
504
511
def __procoutputfile (parm , lon_key , lat_key ):
505
512
if "open_on_complete" in parm ["output" ] and parm ["output" ]["open_on_complete" ]:
506
- # Read Parquet File as DataFrame
507
- df = geopandas .pd .read_parquet (parm ["output" ]["path" ])
508
- # Build GeoDataFrame
509
- gdf = __todataframe (df , lon_key = lon_key , lat_key = lat_key )
510
- # Return Results
511
- return gdf
513
+ # Return GeoParquet File as GeoDataFrame
514
+ return geopandas .read_parquet (parm ["output" ]["path" ])
512
515
else :
513
516
# Return Parquet Filename
514
517
return parm ["output" ]["path" ]
@@ -804,7 +807,7 @@ def atl06p(parm, asset=DEFAULT_ASSET, version=DEFAULT_ICESAT2_SDP_VERSION, callb
804
807
columns = {}
805
808
elevation_records = []
806
809
num_elevations = 0
807
- field_dictionary = {} # [' field_name' ] = {"extent_id": [], field_name: []}
810
+ field_dictionary = {} # [< field_name> ] = {"extent_id": [], < field_name> : []}
808
811
if len (rsps ) > 0 :
809
812
# Sort Records
810
813
for rsp in rsps :
@@ -814,15 +817,23 @@ def atl06p(parm, asset=DEFAULT_ASSET, version=DEFAULT_ICESAT2_SDP_VERSION, callb
814
817
elif 'extrec' == rsp ['__rectype' ]:
815
818
field_name = parm ['atl03_geo_fields' ][rsp ['field_index' ]]
816
819
if field_name not in field_dictionary :
817
- field_dictionary [field_name ] = {" extent_id" : [], field_name : []}
820
+ field_dictionary [field_name ] = {' extent_id' : [], field_name : []}
818
821
# Parse Ancillary Data
819
- data = __get_values (rsp ['data' ], rsp ['data_type ' ], len (rsp ['data' ]))
822
+ data = __get_values (rsp ['data' ], rsp ['datatype ' ], len (rsp ['data' ]))
820
823
# Add Left Pair Track Entry
821
824
field_dictionary [field_name ]['extent_id' ] += rsp ['extent_id' ] | 0x2 ,
822
825
field_dictionary [field_name ][field_name ] += data [LEFT_PAIR ],
823
826
# Add Right Pair Track Entry
824
827
field_dictionary [field_name ]['extent_id' ] += rsp ['extent_id' ] | 0x3 ,
825
828
field_dictionary [field_name ][field_name ] += data [RIGHT_PAIR ],
829
+ elif 'rsrec' == rsp ['__rectype' ]:
830
+ for sample in rsp ["samples" ]:
831
+ time_str = sliderule .gps2utc (sample ["time" ])
832
+ field_name = parm ['samples' ][rsp ['raster_index' ]] + "-" + time_str .split (" " )[0 ].strip ()
833
+ if field_name not in field_dictionary :
834
+ field_dictionary [field_name ] = {'extent_id' : [], field_name : []}
835
+ field_dictionary [field_name ]['extent_id' ] += rsp ['extent_id' ],
836
+ field_dictionary [field_name ][field_name ] += sample ['value' ],
826
837
# Build Elevation Columns
827
838
if num_elevations > 0 :
828
839
# Initialize Columns
@@ -967,23 +978,23 @@ def atl03sp(parm, asset=DEFAULT_ASSET, version=DEFAULT_ICESAT2_SDP_VERSION, call
967
978
# Get Field Type
968
979
field_name = parm ['atl03_geo_fields' ][rsp ['field_index' ]]
969
980
if field_name not in extent_field_types :
970
- extent_field_types [field_name ] = sliderule .basictypes [sliderule .codedtype2str [rsp ['data_type ' ]]]["nptype" ]
981
+ extent_field_types [field_name ] = sliderule .basictypes [sliderule .codedtype2str [rsp ['datatype ' ]]]["nptype" ]
971
982
# Initialize Extent Dictionary Entry
972
983
if extent_id not in extent_dictionary :
973
984
extent_dictionary [extent_id ] = {}
974
985
# Save of Values per Extent ID per Field Name
975
- data = __get_values (rsp ['data' ], rsp ['data_type ' ], len (rsp ['data' ]))
986
+ data = __get_values (rsp ['data' ], rsp ['datatype ' ], len (rsp ['data' ]))
976
987
extent_dictionary [extent_id ][field_name ] = data
977
988
elif 'phrec' == rsp ['__rectype' ]:
978
989
# Get Field Type
979
990
field_name = parm ['atl03_ph_fields' ][rsp ['field_index' ]]
980
991
if field_name not in photon_field_types :
981
- photon_field_types [field_name ] = sliderule .basictypes [sliderule .codedtype2str [rsp ['data_type ' ]]]["nptype" ]
992
+ photon_field_types [field_name ] = sliderule .basictypes [sliderule .codedtype2str [rsp ['datatype ' ]]]["nptype" ]
982
993
# Initialize Extent Dictionary Entry
983
994
if extent_id not in photon_dictionary :
984
995
photon_dictionary [extent_id ] = {}
985
996
# Save of Values per Extent ID per Field Name
986
- data = __get_values (rsp ['data' ], rsp ['data_type ' ], len (rsp ['data' ]))
997
+ data = __get_values (rsp ['data' ], rsp ['datatype ' ], len (rsp ['data' ]))
987
998
photon_dictionary [extent_id ][field_name ] = data
988
999
# Build Elevation Columns
989
1000
if num_photons > 0 :
@@ -1331,22 +1342,25 @@ def toregion(source, tolerance=0.0, cellsize=0.01, n_clusters=1):
1331
1342
# generate clusters
1332
1343
clusters = []
1333
1344
if n_clusters > 1 :
1334
- # pull out centroids of each geometry object
1335
- if "CenLon" in gdf and "CenLat" in gdf :
1336
- X = numpy .column_stack ((gdf ["CenLon" ], gdf ["CenLat" ]))
1345
+ if clustering_enabled :
1346
+ # pull out centroids of each geometry object
1347
+ if "CenLon" in gdf and "CenLat" in gdf :
1348
+ X = numpy .column_stack ((gdf ["CenLon" ], gdf ["CenLat" ]))
1349
+ else :
1350
+ s = gdf .centroid
1351
+ X = numpy .column_stack ((s .x , s .y ))
1352
+ # run k means clustering algorithm against polygons in gdf
1353
+ kmeans = KMeans (n_clusters = n_clusters , init = 'k-means++' , random_state = 5 , max_iter = 400 )
1354
+ y_kmeans = kmeans .fit_predict (X )
1355
+ k = geopandas .pd .DataFrame (y_kmeans , columns = ['cluster' ])
1356
+ gdf = gdf .join (k )
1357
+ # build polygon for each cluster
1358
+ for n in range (n_clusters ):
1359
+ c_gdf = gdf [gdf ["cluster" ] == n ]
1360
+ c_poly = __gdf2poly (c_gdf )
1361
+ clusters .append (c_poly )
1337
1362
else :
1338
- s = gdf .centroid
1339
- X = numpy .column_stack ((s .x , s .y ))
1340
- # run k means clustering algorithm against polygons in gdf
1341
- kmeans = KMeans (n_clusters = n_clusters , init = 'k-means++' , random_state = 5 , max_iter = 400 )
1342
- y_kmeans = kmeans .fit_predict (X )
1343
- k = geopandas .pd .DataFrame (y_kmeans , columns = ['cluster' ])
1344
- gdf = gdf .join (k )
1345
- # build polygon for each cluster
1346
- for n in range (n_clusters ):
1347
- c_gdf = gdf [gdf ["cluster" ] == n ]
1348
- c_poly = __gdf2poly (c_gdf )
1349
- clusters .append (c_poly )
1363
+ raise sliderule .FatalError ("Clustering support not enabled; unable to import sklearn package" )
1350
1364
1351
1365
# update timing profiles
1352
1366
profiles [toregion .__name__ ] = time .perf_counter () - tstart
0 commit comments