Skip to content

Demonstrate use of GCS auth helpers #1574

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 36 additions & 51 deletions gtfs_digest/_transit_routes_on_shn.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,24 @@
from functools import cache

import geopandas as gpd
import numpy as np
import pandas as pd
from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
from calitp_data_analysis import geography_utils
from shared_utils import publish_utils

import google.auth
credentials, project = google.auth.default()
import gcsfs
fs = gcsfs.GCSFileSystem()

from calitp_data_analysis import geography_utils, utils
# from segment_speed_utils import gtfs_schedule_wrangling, helpers
from shared_utils import (
#catalog_utils,
#dask_utils,
#gtfs_utils_v2,
#portfolio_utils,
publish_utils,
#rt_dates,
#rt_utils,
)
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS, SCHED_GCS, SEGMENT_GCS
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS

GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/state_highway_network/"

"""
Functions
"""

@cache
def gcs_geopandas():
return GCSGeoPandas()

def process_transit_routes() -> gpd.GeoDataFrame:
"""
Select the most recent transit route to
Expand All @@ -41,10 +35,8 @@ def process_transit_routes() -> gpd.GeoDataFrame:
"recent_combined_name",
# "route_id",
]
op_geography_df = gpd.read_parquet(
f"{RT_SCHED_GCS}{OPERATOR_ROUTE}.parquet",
storage_options={"token": credentials.token},
)[subset]

op_geography_df = gcs_geopandas().read_parquet(f"{RT_SCHED_GCS}{OPERATOR_ROUTE}.parquet")[subset]

# Keep the row for each portfolio_organization_name/recent_combined_name
# that is the most recent.
Expand Down Expand Up @@ -80,10 +72,7 @@ def dissolve_shn(columns_to_dissolve: list, file_name: str) -> gpd.GeoDataFrame:
"shared_data_catalog"
).state_highway_network.urlpath

shn = gpd.read_parquet(
SHN_FILE,
storage_options={"token": credentials.token},
).to_crs(geography_utils.CA_NAD83Albers_ft)
shn = gcs_geopandas().read_parquet(SHN_FILE).to_crs(geography_utils.CA_NAD83Albers_ft)

# Dissolve by route which represents the the route's name and drop the other columns
# because they are no longer relevant.
Expand All @@ -102,10 +91,11 @@ def dissolve_shn(columns_to_dissolve: list, file_name: str) -> gpd.GeoDataFrame:
)

# Save this out so I don't have to dissolve it each time.
shn_dissolved.to_parquet(
f"gs://calitp-analytics-data/data-analyses/state_highway_network/shn_dissolved_by_{file_name}.parquet",
filesystem=fs,
shn_dissolved = gcs_geopandas().geo_data_frame_to_parquet(
shn_dissolved,
f"gs://calitp-analytics-data/data-analyses/state_highway_network/shn_dissolved_by_{file_name}.parquet"
)

return shn_dissolved

def buffer_shn(buffer_amount: int, file_name: str) -> gpd.GeoDataFrame:
Expand All @@ -114,10 +104,7 @@ def buffer_shn(buffer_amount: int, file_name: str) -> gpd.GeoDataFrame:
transit routes.
"""
# Read in the dissolved SHN file
shn_df = gpd.read_parquet(
f"{GCS_FILE_PATH}shn_dissolved_by_{file_name}.parquet",
storage_options={"token": credentials.token},
)
shn_df = gcs_geopandas().read_parquet(f"{GCS_FILE_PATH}shn_dissolved_by_{file_name}.parquet")

# Buffer the state highway.
shn_df_buffered = shn_df.assign(
Expand All @@ -126,9 +113,9 @@ def buffer_shn(buffer_amount: int, file_name: str) -> gpd.GeoDataFrame:

# Save it out so we won't have to buffer over again and
# can just read it in.
shn_df_buffered.to_parquet(
f"{GCS_FILE_PATH}shn_buffered_{buffer_amount}_ft_{file_name}.parquet",
filesystem=fs,
shn_df_buffered = gcs_geopandas().geo_data_frame_to_parquet(
shn_df_buffered,
f"{GCS_FILE_PATH}shn_buffered_{buffer_amount}_ft_{file_name}.parquet"
)

return shn_df_buffered
Expand All @@ -143,10 +130,8 @@ def routes_shn_intersection(buffer_amount: int, file_name: str) -> gpd.GeoDataFr
# Read in buffered shn here or re buffer if we don't have it available.
HWY_FILE = f"{GCS_FILE_PATH}shn_buffered_{buffer_amount}_ft_{file_name}.parquet"

if fs.exists(HWY_FILE):
shn_routes_gdf = gpd.read_parquet(
HWY_FILE, storage_options={"token": credentials.token}
)
if gcs_geopandas().gcs_filesystem.exists(HWY_FILE):
shn_routes_gdf = gcs_geopandas().read_parquet(HWY_FILE)
else:
shn_routes_gdf = buffer_shn(buffer_amount)

Expand Down Expand Up @@ -243,16 +228,17 @@ def dissolve_buffered_for_map(buffer_amount: str) -> gpd.GeoDataFrame:
# Read in buffered shn here
HWY_FILE = (
f"{GCS_FILE_PATH}shn_buffered_{buffer_amount}_ft_ct_district_route.parquet"

)
gdf = gpd.read_parquet(HWY_FILE, storage_options={"token": credentials.token})
gdf = gcs_geopandas().read_parquet(HWY_FILE)

# Dissolve by district
gdf2 = gdf.dissolve("District").reset_index()[["geometry", "District", "shn_route"]]

# Save
gdf2.to_parquet(
f"{GCS_FILE_PATH}shn_buffered_{buffer_amount}_gtfs_digest.parquet",
filesystem=fs,
gcs_geopandas().geo_data_frame_to_parquet(
gdf2,
f"{GCS_FILE_PATH}shn_buffered_{buffer_amount}_gtfs_digest.parquet"
)

def final_transit_route_shs_outputs(
Expand Down Expand Up @@ -342,13 +328,12 @@ def final_transit_route_shs_outputs(
open_data_portal_df = prep_open_data_portal(intersection_gdf)

# Save everything out for now
intersection_gdf.to_parquet(
f"{GCS_FILE_PATH}transit_route_intersect_shn_{SHN_HWY_BUFFER_FEET}_gtfs_digest.parquet",
filesystem=fs,
gcs_geopandas().geo_data_frame_to_parquet(
intersection_gdf,
f"{GCS_FILE_PATH}transit_route_intersect_shn_{SHN_HWY_BUFFER_FEET}_gtfs_digest.parquet"
)
open_data_portal_df.to_parquet(
f"{GCS_FILE_PATH}transit_route_shn_open_data_portal_{SHN_HWY_BUFFER_FEET}.parquet",
filesystem=fs,

gcs_geopandas().geo_data_frame_to_parquet(
open_data_portal_df,
f"{GCS_FILE_PATH}transit_route_shn_open_data_portal_{SHN_HWY_BUFFER_FEET}.parquet"
)

68 changes: 21 additions & 47 deletions ntd/sjcog_service_analysis/explore_sjcog_service.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,34 +32,19 @@
},
"outputs": [],
"source": [
"from functools import cache\n",
"\n",
"import altair as alt\n",
"import pandas as pd\n",
"from calitp_data_analysis.gcs_geopandas import GCSGeoPandas\n",
"from calitp_data_analysis.tables import tbls\n",
"from siuba import _, collect, count, filter, group_by, select, show_query, summarize\n",
"\n",
"# from update_vars import GCS_FILE_PATH, NTD_MODES, NTD_TOS\n",
"from siuba import _, collect, filter, group_by, summarize\n",
"import geopandas as gpd\n",
"\n",
"pd.set_option(\"display.max_columns\", None)\n",
"pd.set_option(\"display.max_rows\", None)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "1df06b6e-4f00-4fbd-8171-64f1e1a35dae",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import geopandas as gpd\n",
"import gcsfs\n",
"import google.auth\n",
"\n",
"credentials, project = google.auth.default()\n",
"fs = gcsfs.GCSFileSystem(token=credentials)"
]
},
{
"cell_type": "code",
"execution_count": 3,
Expand Down Expand Up @@ -630,25 +615,11 @@
},
"outputs": [],
"source": [
"# tsi_data = gpd.read_file(\n",
"# tsi_url,\n",
"# storage_options={\"token\": credentials.token}\n",
"# )"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "c8215a6b-c882-4aef-9717-99fa82178539",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from calitp_data_analysis import *\n",
"fs = get_fs()\n",
"with fs.open(tsi_url) as f:\n",
" tsi_data=gpd.read_file(f)"
"@cache\n",
"def gcs_geopandas():\n",
" return GCSGeoPandas()\n",
"\n",
"tsi_data = gcs_geopandas().read_file(tsi_url)"
]
},
{
Expand Down Expand Up @@ -696,10 +667,7 @@
"# CA Counties\n",
"county_url=\"https://caltrans-gis.dot.ca.gov/arcgis/rest/services/CHboundary/County_Boundaries/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson\"\n",
"\n",
"county_map=gpd.read_file(\n",
" county_url,\n",
" storage_options={\"token\": credentials.token}\n",
")"
"county_map=gpd.read_file(county_url)"
]
},
{
Expand Down Expand Up @@ -747,10 +715,8 @@
"source": [
"# UZA map\n",
"uza_url=\"https://caltrans-gis.dot.ca.gov/arcgis/rest/services/CHboundary/Adjusted_Urban_Area/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson\"\n",
"uza_map=gpd.read_file(\n",
" uza_url,\n",
" storage_options={\"token\": credentials.token}\n",
")"
"\n",
"uza_map=gpd.read_file(uza_url)"
]
},
{
Expand Down Expand Up @@ -7504,6 +7470,14 @@
"metadata": {},
"outputs": [],
"source": []
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": "",
"id": "e1ab6437924be9b1"
}
],
"metadata": {
Expand Down