Skip to content

Commit bed9fcd

Browse files
committed
add s3 retrieve and granule plot
1 parent 2ee900d commit bed9fcd

File tree

1 file changed

+198
-6
lines changed

1 file changed

+198
-6
lines changed

examples/cmr_debug_regions.ipynb

Lines changed: 198 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,52 @@
2020
"metadata": {},
2121
"outputs": [],
2222
"source": [
23-
"from sliderule import icesat2, ipysliderule\n",
24-
"import numpy as np\n",
23+
"import re\n",
24+
"import time\n",
2525
"import logging\n",
26+
"import posixpath\n",
27+
"import numpy as np\n",
28+
"import geopandas as gpd\n",
29+
"import concurrent.futures\n",
2630
"import matplotlib.cm as cm\n",
2731
"import matplotlib.colors as colors\n",
32+
"import ipywidgets as widgets\n",
33+
"from shapely.geometry import LineString\n",
34+
"from sliderule import sliderule, icesat2, ipysliderule\n",
35+
"import sliderule.io\n",
2836
"# autoreload\n",
2937
"%load_ext autoreload\n",
3038
"%autoreload 2\n",
3139
"# create logger\n",
3240
"logging.basicConfig(level=logging.INFO)"
3341
]
3442
},
43+
{
44+
"cell_type": "markdown",
45+
"metadata": {},
46+
"source": [
47+
"#### Set ICESat-2 Product"
48+
]
49+
},
50+
{
51+
"cell_type": "code",
52+
"execution_count": null,
53+
"metadata": {},
54+
"outputs": [],
55+
"source": [
56+
"# Configure ICESat-2 API\n",
57+
"icesat2.init(\"icesat2sliderule.org\", loglevel=logging.WARNING)\n",
58+
"icesat2.get_version()\n",
59+
"\n",
60+
"# dropdown menu for ICESat-2 product\n",
61+
"product_select = widgets.Dropdown(\n",
62+
" options=['ATL03','ATL06','ATL08'],\n",
63+
" description='Product:',\n",
64+
" disabled=False\n",
65+
")\n",
66+
"display(product_select)"
67+
]
68+
},
3569
{
3670
"cell_type": "markdown",
3771
"metadata": {},
@@ -68,7 +102,9 @@
68102
"granule_polygons = []\n",
69103
"for poly in m.regions:\n",
70104
" # polygon from map\n",
71-
" resources,polygons = icesat2.cmr(polygon=poly, return_polygons=True)\n",
105+
" resources,polygons = icesat2.cmr(polygon=poly,\n",
106+
" short_name=product_select.value,\n",
107+
" return_polygons=True)\n",
72108
" granule_list.extend(resources)\n",
73109
" granule_polygons.extend(polygons)\n",
74110
"# print list of granules\n",
@@ -77,6 +113,21 @@
77113
"logging.debug(granule_list)"
78114
]
79115
},
116+
{
117+
"cell_type": "code",
118+
"execution_count": null,
119+
"metadata": {},
120+
"outputs": [],
121+
"source": [
122+
"granule_select = widgets.SelectMultiple(\n",
123+
" options=granule_list,\n",
124+
" description='Granules:',\n",
125+
" layout=widgets.Layout(width='35%', height='200px'),\n",
126+
" disabled=False\n",
127+
")\n",
128+
"display(granule_select)"
129+
]
130+
},
80131
{
81132
"cell_type": "markdown",
82133
"metadata": {},
@@ -90,9 +141,10 @@
90141
"metadata": {},
91142
"outputs": [],
92143
"source": [
93-
"cmap = iter(cm.viridis(np.linspace(0,1,num_granules)))\n",
94-
"for region in granule_polygons:\n",
95-
" locations = [(p['lat'],p['lon']) for p in region]\n",
144+
"granule_indices = list(granule_select.index)\n",
145+
"cmap = iter(cm.viridis(np.linspace(0,1,len(granule_indices))))\n",
146+
"for g in granule_indices:\n",
147+
" locations = [(p['lat'],p['lon']) for p in granule_polygons[g]]\n",
96148
" color = colors.to_hex(next(cmap))\n",
97149
" polygon = ipysliderule.ipyleaflet.Polygon(\n",
98150
" locations=locations,\n",
@@ -103,6 +155,146 @@
103155
" )\n",
104156
" m.map.add_layer(polygon)"
105157
]
158+
},
159+
{
160+
"cell_type": "code",
161+
"execution_count": null,
162+
"metadata": {},
163+
"outputs": [],
164+
"source": [
165+
"def s3_retrieve(granule, **kwargs):\n",
166+
" # set default keyword arguments\n",
167+
" kwargs.setdefault('asset','nsidc-s3')\n",
168+
" kwargs.setdefault('lon_key','reference_photon_lon')\n",
169+
" kwargs.setdefault('lat_key','reference_photon_lat')\n",
170+
" kwargs.setdefault('index_key','time')\n",
171+
" kwargs.setdefault('polygon',None)\n",
172+
" # regular expression operator for extracting information from files\n",
173+
" rx = re.compile(r'(ATL\\d{2})(-\\d{2})?_(\\d{4})(\\d{2})(\\d{2})(\\d{2})'\n",
174+
" r'(\\d{2})(\\d{2})_(\\d{4})(\\d{2})(\\d{2})_(\\d{3})_(\\d{2})(.*?).h5$')\n",
175+
" # extract parameters from ICESat-2 granule\n",
176+
" PRD,HEM,YY,MM,DD,HH,MN,SS,TRK,CYCL,GRN,RL,VRS,AUX=rx.findall(granule).pop()\n",
177+
" # variables of interest\n",
178+
" if (PRD == 'ATL03'):\n",
179+
" segment_group = \"geolocation\"\n",
180+
" segment_key = 'segment_id'\n",
181+
" vnames = ['segment_id','delta_time','reference_photon_lat',\n",
182+
" 'reference_photon_lon']\n",
183+
" elif (PRD == 'ATL06'):\n",
184+
" segment_group = \"land_ice_segments\"\n",
185+
" segment_key = 'segment_id'\n",
186+
" vnames = ['segment_id','delta_time','latitude','longitude']\n",
187+
" elif (PRD == 'ATL08'):\n",
188+
" segment_group = \"land_segments\"\n",
189+
" segment_key = 'segment_id_beg'\n",
190+
" vnames = ['segment_id_beg','segment_id_end','delta_time',\n",
191+
" 'latitude','longitude']\n",
192+
" # for each valid beam within the HDF5 file\n",
193+
" frames = []\n",
194+
" gt = dict(gt1l=10,gt1r=20,gt2l=30,gt2r=40,gt3l=50,gt3r=60)\n",
195+
" atlas_sdp_epoch = np.datetime64('2018-01-01T00:00:00')\n",
196+
" kwds = dict(startrow=0,numrows=-1)\n",
197+
" for gtx in ['gt1l','gt1r','gt2l','gt2r','gt3l','gt3r']:\n",
198+
" geodatasets = [dict(dataset=f'{gtx}/{segment_group}/{v}',**kwds) for v in vnames]\n",
199+
" try:\n",
200+
" # get datasets from s3\n",
201+
" hidatasets = icesat2.h5p(geodatasets, granule, kwargs['asset'])\n",
202+
" # copy to new \"flattened\" dictionary\n",
203+
" data = {posixpath.basename(key):var for key,var in hidatasets.items()}\n",
204+
" # Generate Time Column\n",
205+
" delta_time = (data['delta_time']*1e9).astype('timedelta64[ns]')\n",
206+
" data['time'] = gpd.pd.to_datetime(atlas_sdp_epoch + delta_time)\n",
207+
" except:\n",
208+
" pass\n",
209+
" else:\n",
210+
" # copy filename parameters\n",
211+
" data['rgt'] = [int(TRK)]*len(data['delta_time'])\n",
212+
" data['cycle'] = [int(CYCL)]*len(data['delta_time'])\n",
213+
" data['gt'] = [gt[gtx]]*len(data['delta_time'])\n",
214+
" # pandas dataframe from compiled dictionary\n",
215+
" frames.append(gpd.pd.DataFrame.from_dict(data))\n",
216+
" # concatenate pandas dataframe\n",
217+
" try:\n",
218+
" df = gpd.pd.concat(frames)\n",
219+
" except:\n",
220+
" return sliderule.icesat2.__emptyframe()\n",
221+
" # convert to a GeoDataFrame\n",
222+
" lon_key,lat_key = (kwargs['lon_key'],kwargs['lat_key'])\n",
223+
" geometry = gpd.points_from_xy(df[lon_key], df[lat_key])\n",
224+
" gdf = gpd.GeoDataFrame(df.drop(columns=[lon_key,lat_key]),\n",
225+
" geometry=geometry,crs='EPSG:4326')\n",
226+
" # create global track variable\n",
227+
" track = 6*(gdf['rgt']-1) + (gdf['gt']/10)\n",
228+
" gdf = gdf.assign(track=track)\n",
229+
" # calculate global reference point\n",
230+
" global_ref_pt = 6*1387*gdf[segment_key] + track\n",
231+
" gdf = gdf.assign(global_ref_pt=global_ref_pt)\n",
232+
" # sort values for reproducible output despite async processing\n",
233+
" gdf.set_index(kwargs['index_key'], inplace=True)\n",
234+
" gdf.sort_index(inplace=True)\n",
235+
" # remove duplicate points\n",
236+
" gdf = gdf[~gdf.index.duplicated()]\n",
237+
" # intersect with geometry in projected reference system\n",
238+
" if kwargs['polygon'] is not None:\n",
239+
" gdf = gpd.overlay(gdf.to_crs(kwargs['polygon'].crs),\n",
240+
" kwargs['polygon'], how='intersection')\n",
241+
" # convert back to original coordinate reference system\n",
242+
" return gdf.to_crs('EPSG:4326')"
243+
]
244+
},
245+
{
246+
"cell_type": "code",
247+
"execution_count": null,
248+
"metadata": {},
249+
"outputs": [],
250+
"source": [
251+
"# granule resources for selected segments\n",
252+
"perf_start = time.perf_counter()\n",
253+
"gdf = sliderule.icesat2.__emptyframe()\n",
254+
"num_servers = sliderule.update_available_servers()\n",
255+
"max_workers = num_servers * icesat2.SERVER_SCALE_FACTOR\n",
256+
"with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:\n",
257+
" futures = [executor.submit(s3_retrieve, granule_list[g]) for g in granule_indices]\n",
258+
" # Wait for Results\n",
259+
" for future in concurrent.futures.as_completed(futures):\n",
260+
" # append to dataframe\n",
261+
" gdf = gdf.append(future.result())\n",
262+
"\n",
263+
"# Display Statistics\n",
264+
"perf_stop = time.perf_counter()\n",
265+
"perf_duration = perf_stop - perf_start\n",
266+
"print(\"Completed in {:.3f} seconds of wall-clock time\".format(perf_duration))\n",
267+
"print(\"Reference Ground Tracks: {}\".format(gdf[\"rgt\"].unique()))\n",
268+
"print(\"Cycles: {}\".format(gdf[\"cycle\"].unique()))\n",
269+
"print(\"Received {} elevations\".format(gdf.shape[0]))"
270+
]
271+
},
272+
{
273+
"cell_type": "code",
274+
"execution_count": null,
275+
"metadata": {},
276+
"outputs": [],
277+
"source": [
278+
"# fix int columns that were converted in objects\n",
279+
"fixed = gdf.drop(columns=['geometry'])\n",
280+
"for column in fixed.select_dtypes(include='object').columns:\n",
281+
" fixed[column] = fixed[column].astype(\"int32\")\n",
282+
"fixed = gpd.GeoDataFrame(fixed,geometry=gdf.geometry,crs='EPSG:4326')"
283+
]
284+
},
285+
{
286+
"cell_type": "code",
287+
"execution_count": null,
288+
"metadata": {},
289+
"outputs": [],
290+
"source": [
291+
"# convert from points to linestrings grouping by track\n",
292+
"grouped = fixed.groupby(['track'])['geometry'].apply(\n",
293+
" lambda x: LineString(x.tolist()) if x.size > 1 else x.tolist())\n",
294+
"geodata = ipysliderule.ipyleaflet.GeoData(geo_dataframe=gpd.GeoDataFrame(grouped),\n",
295+
" style={'color': 'black', 'opacity':1, 'weight':0.1})\n",
296+
"m.map.add_layer(geodata)"
297+
]
106298
}
107299
],
108300
"metadata": {

0 commit comments

Comments
 (0)