Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import requests | |
| import geopandas as gpd | |
| import pandas as pd | |
| from shapely.geometry import shape | |
| API_KEY = "ea9637fd9f0c41f3e2e932faa99dfcd76f8041aa" | |
| # Mapping dictionaries | |
| state_abbrev_to_fips = { | |
| "AL": "01", "AK": "02", "AZ": "04", "AR": "05", "CA": "06", "CO": "08", | |
| "CT": "09", "DE": "10", "DC": "11", "FL": "12", "GA": "13", "HI": "15", | |
| "ID": "16", "IL": "17", "IN": "18", "IA": "19", "KS": "20", "KY": "21", | |
| "LA": "22", "ME": "23", "MD": "24", "MA": "25", "MI": "26", "MN": "27", | |
| "MS": "28", "MO": "29", "MT": "30", "NE": "31", "NV": "32", "NH": "33", | |
| "NJ": "34", "NM": "35", "NY": "36", "NC": "37", "ND": "38", "OH": "39", | |
| "OK": "40", "OR": "41", "PA": "42", "RI": "44", "SC": "45", "SD": "46", | |
| "TN": "47", "TX": "48", "UT": "49", "VT": "50", "VA": "51", "WA": "53", | |
| "WV": "54", "WI": "55", "WY": "56" | |
| } | |
| fips_to_abbrev = {v: k for k, v in state_abbrev_to_fips.items()} | |
| def fetch_state_data(): | |
| """Fetch state-level Census data and return as a DataFrame.""" | |
| state_url = ( | |
| f"https://api.census.gov/data/2019/pep/population" | |
| f"?get=NAME,POP&for=state:*&key={API_KEY}" | |
| ) | |
| r_states = requests.get(state_url) | |
| state_data = r_states.json() | |
| df = pd.DataFrame(state_data[1:], columns=state_data[0]) | |
| df["POP"] = df["POP"].astype(int) | |
| return df | |
| def fetch_county_data(): | |
| """Fetch county-level Census data and return as a DataFrame.""" | |
| county_url = ( | |
| f"https://api.census.gov/data/2019/pep/population" | |
| f"?get=NAME,POP&for=county:*&key={API_KEY}" | |
| ) | |
| r_counties = requests.get(county_url) | |
| county_data = r_counties.json() | |
| df = pd.DataFrame(county_data[1:], columns=county_data[0]) | |
| df["POP"] = df["POP"].astype(int) | |
| df[['countyName', 'stateName']] = df['NAME'].str.split(',', expand=True) | |
| df["FIPS"] = df["state"].str.zfill(2) + df["county"].str.zfill(3) | |
| return df | |
| def fetch_geojson(url: str): | |
| """Fetch the GeoJSON data from the provided URL.""" | |
| return requests.get(url).json() | |
| def load_epa_data(): | |
| """ | |
| Load EPA region data from the CSV file. | |
| This version explicitly reads only the 'Region' and 'States' columns, | |
| which prevents extra trailing commas from creating additional columns. | |
| """ | |
| df = pd.read_csv("epa_regions.csv", usecols=["Region", "States"]) | |
| # Force uppercase on the state abbreviations and strip whitespace | |
| df["States"] = df["States"].str.strip().str.upper() | |
| return df | |
| def build_states_gdf(state_df, state_abbrev_to_fips): | |
| """ | |
| Build a GeoDataFrame for US states with Census data and EPA region attached. | |
| """ | |
| state_pop_dict = state_df.set_index("state")["POP"].to_dict() | |
| state_name_dict = state_df.set_index("state")["NAME"].to_dict() | |
| url = "https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/us-states.json" | |
| geo_data = fetch_geojson(url) | |
| rows = [] | |
| for feat in geo_data["features"]: | |
| abbrev = feat["id"].upper() # Ensure uppercase | |
| geom = shape(feat["geometry"]) | |
| fips = state_abbrev_to_fips.get(abbrev) | |
| if fips: | |
| pop_val = state_pop_dict.get(fips, "No data") | |
| name_val = state_name_dict.get(fips, "No data") | |
| rows.append({ | |
| "geometry": geom, | |
| "STATE_FIPS": fips, | |
| "NAME": name_val, | |
| "POP": pop_val | |
| }) | |
| gdf = gpd.GeoDataFrame(rows, crs="EPSG:4326") | |
| gdf["POP_TT"] = gdf["POP"].apply(lambda x: f"{int(x):,}" if isinstance(x, int) else "No data") | |
| # Add state abbreviation column for merging EPA data | |
| gdf["STATE_ABBR"] = gdf["STATE_FIPS"].map(fips_to_abbrev) | |
| # Merge EPA data | |
| epa_df = load_epa_data() | |
| gdf = gdf.merge(epa_df, left_on="STATE_ABBR", right_on="States", how="left") | |
| # Rename EPA column and ensure it's numeric | |
| gdf.rename(columns={"Region": "EPA_REGION"}, inplace=True) | |
| gdf["EPA_REGION"] = pd.to_numeric(gdf["EPA_REGION"], errors="coerce") | |
| return gdf | |
| def build_counties_gdf(county_df): | |
| """ | |
| Build a GeoDataFrame for US counties, simplify geometries, | |
| and merge EPA region data. | |
| """ | |
| county_pop_dict = county_df.set_index("FIPS")["POP"].to_dict() | |
| county_name_dict = county_df.set_index("FIPS")["NAME"].to_dict() | |
| url = "https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json" | |
| geo_data = fetch_geojson(url) | |
| rows = [] | |
| for feat in geo_data["features"]: | |
| fips = feat["id"] | |
| geom = shape(feat["geometry"]) | |
| pop_val = county_pop_dict.get(fips, "No data") | |
| name_val = county_name_dict.get(fips, "No data") | |
| rows.append({ | |
| "geometry": geom, | |
| "FIPS": fips, | |
| "NAME": name_val, | |
| "POP": pop_val | |
| }) | |
| gdf = gpd.GeoDataFrame(rows, crs="EPSG:4326") | |
| gdf["geometry"] = gdf["geometry"].simplify(tolerance=0.05, preserve_topology=True) | |
| # Add state abbreviation column for merging EPA data | |
| gdf["STATE_ABBR"] = gdf["FIPS"].str[:2].map(fips_to_abbrev) | |
| epa_df = load_epa_data() | |
| gdf = gdf.merge(epa_df, left_on="STATE_ABBR", right_on="States", how="left") | |
| gdf.rename(columns={"Region": "EPA_REGION"}, inplace=True) | |
| gdf["EPA_REGION"] = pd.to_numeric(gdf["EPA_REGION"], errors="coerce") | |
| return gdf | |
| def load_and_merge_caps(states_gdf): | |
| """ | |
| Merge state-level climate action plan data with the states GeoDataFrame | |
| and precompute display columns. | |
| """ | |
| caps_df = pd.read_csv("caps_plans.csv") | |
| caps_df["State"] = caps_df["State"].str.strip().str.upper() | |
| caps_df["STATE_FIPS"] = caps_df["State"].map(state_abbrev_to_fips) | |
| caps_df["plan_info"] = caps_df.apply( | |
| lambda row: f"{row['City']}, {row['Year']}, {row['Plan Type']}", axis=1 | |
| ) | |
| grouped = caps_df.groupby("STATE_FIPS").agg( | |
| n_caps=("Plan Type", "count"), | |
| plan_list=("plan_info", lambda x: list(x)) | |
| ).reset_index() | |
| merged = states_gdf.merge(grouped, on="STATE_FIPS", how="left") | |
| merged["n_caps"] = merged["n_caps"].fillna(0).astype(int) | |
| merged["plan_list"] = merged["plan_list"].apply(lambda x: x if isinstance(x, list) else []) | |
| if "POP_TT" not in merged.columns: | |
| merged["POP_TT"] = merged["POP"].apply(lambda x: f"{int(x):,}" if isinstance(x, int) else "No data") | |
| return merged | |
| def load_and_merge_caps_county(counties_gdf): | |
| """ | |
| Merge county-level climate action plan data with the counties GeoDataFrame | |
| and precompute display columns. | |
| """ | |
| caps_df = pd.read_csv("caps_plans.csv") | |
| mapping_df = pd.read_csv("city_county_mapping.csv") | |
| caps_df["State"] = caps_df["State"].str.strip().str.upper() | |
| mapping_df["CountyKey"] = mapping_df["County"].apply( | |
| lambda x: x.upper().split(',')[0].replace(" COUNTY", "").strip() | |
| ) | |
| merged_caps = pd.merge( | |
| caps_df, mapping_df, | |
| left_on=["City", "State"], | |
| right_on=["City", "State"], | |
| how="left" | |
| ) | |
| merged_caps["plan_info"] = merged_caps.apply( | |
| lambda row: f"{row['City']}, {row['Year']}, {row['Plan Type']}", axis=1 | |
| ) | |
| merged_caps["CountyKey"] = merged_caps["County"].apply( | |
| lambda x: x.upper().split(',')[0].replace(" COUNTY", "").strip() if pd.notnull(x) else None | |
| ) | |
| grouped = merged_caps.groupby(["CountyKey", "State"]).agg( | |
| n_caps=("Plan Type", "count"), | |
| plan_list=("plan_info", lambda x: list(x)) | |
| ).reset_index() | |
| counties_gdf["STATE"] = counties_gdf["FIPS"].str[:2].map(fips_to_abbrev) | |
| counties_gdf["CountyKey"] = counties_gdf["NAME"].apply( | |
| lambda x: x.upper().split(',')[0].replace(" COUNTY", "").strip() | |
| ) | |
| merged_counties = counties_gdf.merge( | |
| grouped, | |
| left_on=["CountyKey", "STATE"], | |
| right_on=["CountyKey", "State"], | |
| how="left" | |
| ) | |
| merged_counties["n_caps"] = merged_counties["n_caps"].fillna(0).astype(int) | |
| merged_counties["plan_list"] = merged_counties["plan_list"].apply(lambda x: x if isinstance(x, list) else []) | |
| merged_counties["POP_TT"] = merged_counties["POP"].apply( | |
| lambda x: f"{int(x):,}" if pd.notnull(x) and isinstance(x, (int, float)) else "No data" | |
| ) | |
| merged_counties["FIPS_TT"] = merged_counties["FIPS"].apply( | |
| lambda x: x.zfill(5) if isinstance(x, str) and x.isdigit() else "No data" | |
| ) | |
| return merged_counties | |
| def load_city_mapping(): | |
| """Load the city mapping CSV for marker locations.""" | |
| df = pd.read_csv("city_county_mapping.csv") | |
| df["City"] = df["City"].str.strip() | |
| df["State"] = df["State"].str.strip().str.upper() | |
| df["Latitude"] = pd.to_numeric(df["Latitude"], errors="coerce") | |
| df["Longitude"] = pd.to_numeric(df["Longitude"], errors="coerce") | |
| return df | |
| def load_city_plans(): | |
| """Load and group climate action plan data by city and state.""" | |
| df = pd.read_csv("caps_plans.csv") | |
| df["City"] = df["City"].str.strip() | |
| df["State"] = df["State"].str.strip().str.upper() | |
| df["plan_info"] = df.apply(lambda row: f"{row['Year']}, {row['Plan Type']}", axis=1) | |
| grouped = df.groupby(["City", "State"]).agg(plan_list=("plan_info", lambda x: list(x))).reset_index() | |
| return grouped | |
| def merge_nri_data(states_gdf_caps, counties_gdf_caps): | |
| """ | |
| Merge the NRI Future Risk Index data with the state and county GeoDataFrames. | |
| """ | |
| # Read in and select only the relevant columns, rounding numeric values | |
| nri_cols = [ | |
| "STATEABBRV", "STATE", "COUNTY", "STCOFIPS", | |
| "CFLD_MID_HIGHER_PRISKS", "CFLD_LATE_HIGHER_PRISKS", | |
| "CFLD_MID_HIGHER_HM", "CFLD_LATE_HIGHER_HM", | |
| "WFIR_MID_HIGHER_PRISKS", "WFIR_LATE_HIGHER_PRISKS", | |
| "WFIR_MID_HIGHER_HM", "WFIR_LATE_HIGHER_HM", | |
| "DRGT_MID_HIGHER_PRISKS", "DRGT_LATE_HIGHER_PRISKS", | |
| "DRGT_MID_HIGHER_HM", "DRGT_LATE_HIGHER_HM", | |
| "HRCN_MID_HIGHER_PRISKS", "HRCN_LATE_HIGHER_PRISKS", | |
| "HRCN_MID_HIGHER_HM", "HRCN_LATE_HIGHER_HM", | |
| "EXHT_L95_MID_HIGHER_PRISKS", "EXHT_L95_LATE_HIGHER_PRISKS", | |
| "EXHT_L95_MID_HIGHER_HM", "EXHT_L95_LATE_HIGHER_HM" | |
| ] | |
| nri_df = pd.read_excel("data/NRI Future Risk Index.xlsx")[nri_cols].round(2) | |
| # List the risk columns (all the ones we wish to average) | |
| risk_cols = [ | |
| "CFLD_MID_HIGHER_PRISKS", "CFLD_LATE_HIGHER_PRISKS", | |
| "CFLD_MID_HIGHER_HM", "CFLD_LATE_HIGHER_HM", | |
| "WFIR_MID_HIGHER_PRISKS", "WFIR_LATE_HIGHER_PRISKS", | |
| "WFIR_MID_HIGHER_HM", "WFIR_LATE_HIGHER_HM", | |
| "DRGT_MID_HIGHER_PRISKS", "DRGT_LATE_HIGHER_PRISKS", | |
| "DRGT_MID_HIGHER_HM", "DRGT_LATE_HIGHER_HM", | |
| "HRCN_MID_HIGHER_PRISKS", "HRCN_LATE_HIGHER_PRISKS", | |
| "HRCN_MID_HIGHER_HM", "HRCN_LATE_HIGHER_HM", | |
| "EXHT_L95_MID_HIGHER_PRISKS", "EXHT_L95_LATE_HIGHER_PRISKS", | |
| "EXHT_L95_MID_HIGHER_HM", "EXHT_L95_LATE_HIGHER_HM" | |
| ] | |
| # Compute the state-level means on the risk columns | |
| grouped_states = nri_df.groupby("STATE")[risk_cols].mean().round(2) | |
| # Convert FIPS codes to numeric to ensure proper merging | |
| counties_gdf_caps['FIPS'] = pd.to_numeric(counties_gdf_caps['FIPS'], errors='coerce').fillna(0).astype(int) | |
| nri_df['STCOFIPS'] = pd.to_numeric(nri_df['STCOFIPS'], errors='coerce').fillna(0).astype(int) | |
| # Merge the aggregated state data with the states GeoDataFrame | |
| # Here, grouped_states is indexed by state name so we merge using the index. | |
| merged_states_gdf = states_gdf_caps.merge(grouped_states, left_on="NAME", right_index=True, how="left") | |
| # Merge the original NRI data with the counties GeoDataFrame on the FIPS codes | |
| merged_counties_gdf = counties_gdf_caps.merge(nri_df, left_on="FIPS", right_on="STCOFIPS", how="left") | |
| return merged_states_gdf, merged_counties_gdf | |
| def merge_fema_data(states_gdf_caps, counties_gdf_caps): | |
| """ | |
| Merge the FEMA data with the state and county GeoDataFrames. | |
| """ | |
| fema_df = pd.read_csv("data/fema_data.csv") | |
| # List the FEMA numeric columns to average. | |
| # Note: The ISTM columns are kept with their original names. | |
| fema_cols = [ | |
| "RISK_SCORE", "RISK_SPCTL", "EAL_VALT", "SOVI_SCORE", "RESL_SCORE", | |
| "AVLN_EALS", "AVLN_EALT", "CFLD_EALS", "CFLD_EALT", "CWAV_EALS", "CWAV_EALT", | |
| "DRGT_EALS", "DRGT_EALT", "HAIL_EALS", "HAIL_EALT", "HWAV_EALS", "HWAV_EALT", | |
| "HRCN_EALS", "HRCN_EALT", "ISTM_EALS", "ISTM_EALT", "LNDS_EALS", "LNDS_EALT", | |
| "RFLD_EALS", "RFLD_EALT", "SWND_EALS", "SWND_EALT", "TRND_EALS", "TRND_EALT", | |
| "WFIR_EALS", "WFIR_EALT", "WNTW_EALS", "WNTW_EALT" | |
| ] | |
| # Compute the county-level means using the numeric columns | |
| grouped_counties = fema_df.groupby("STCOFIPS")[fema_cols].mean().round(2) | |
| # Similarly, compute state-level means using the same set of columns | |
| grouped_states = fema_df.groupby("STATEABBRV")[fema_cols].mean().round(2) | |
| # Merge the aggregated county data with the counties GeoDataFrame | |
| # Here, grouped_counties is indexed by STCOFIPS so we merge on the index. | |
| merged_counties_gdf = counties_gdf_caps.merge(grouped_counties, left_on="FIPS", right_index=True, how="left") | |
| # Merge the aggregated state data with the states GeoDataFrame | |
| merged_states_gdf = states_gdf_caps.merge(grouped_states, left_on="STATE_ABBR", right_index=True, how="left") | |
| return merged_states_gdf, merged_counties_gdf | |
| def merge_cejst_data(states_gdf_caps, counties_gdf_caps): | |
| """ | |
| Merge the CEJST data with the state and county GeoDataFrames. | |
| """ | |
| cejst_df = pd.read_csv("data/cejst.csv") | |
| # Ensure cejst only contains the desired columns | |
| cejst = cejst_df[['County Name', | |
| 'State/Territory', | |
| 'Share of properties at risk of flood in 30 years (percentile)', | |
| 'Share of properties at risk of flood in 30 years', | |
| 'Share of properties at risk of fire in 30 years (percentile)', | |
| 'Share of properties at risk of fire in 30 years', | |
| 'Energy burden (percentile)', | |
| 'PM2.5 in the air (percentile)', | |
| 'PM2.5 in the air', | |
| "Share of the tract's land area that is covered by impervious surface or cropland as a percent", | |
| 'Current asthma among adults aged greater than or equal to 18 years']].round(2) | |
| numeric_cols = cejst.select_dtypes(include=['number']).columns | |
| # Group by county and state (county-state combination) and calculate the mean of all numeric columns. | |
| county_state_means = cejst.groupby(['County Name', 'State/Territory'])[numeric_cols].mean().round(2).reset_index() | |
| # Group by state only and calculate the mean of all numeric columns. | |
| state_means = cejst.groupby('State/Territory')[numeric_cols].mean().round(2).reset_index() | |
| # Create a new column in county_state_means that matches the format of NAME in county_data | |
| county_state_means["NAME"] = county_state_means["County Name"] + ", " + county_state_means["State/Territory"] | |
| # Perform the merge using the newly created NAME column | |
| merged_counties_gdf = counties_gdf_caps.merge(county_state_means, on="NAME", how="left") | |
| # Merge the aggregated state data with the states GeoDataFrame | |
| merged_states_gdf = states_gdf_caps.merge(state_means, left_on="NAME", right_on="State/Territory", how="left") | |
| return merged_states_gdf, merged_counties_gdf | |
| if __name__ == "__main__": | |
| # Fetch and process Census data | |
| state_df = fetch_state_data() | |
| county_df = fetch_county_data() | |
| states_gdf = build_states_gdf(state_df, state_abbrev_to_fips) | |
| counties_gdf = build_counties_gdf(county_df) | |
| states_gdf_caps = load_and_merge_caps(states_gdf) | |
| counties_gdf_caps = load_and_merge_caps_county(counties_gdf) | |
| states_gdf_caps, counties_gdf_caps = merge_nri_data(states_gdf_caps, counties_gdf_caps) | |
| states_gdf_caps, counties_gdf_caps = merge_fema_data(states_gdf_caps, counties_gdf_caps) | |
| states_gdf_caps, counties_gdf_caps = merge_cejst_data(states_gdf_caps, counties_gdf_caps) | |
| city_mapping_df = load_city_mapping() | |
| city_plans_df = load_city_plans() | |
| # Save all data as binary (pickle) files for fast loading later | |
| state_df.to_pickle("./maps_helpers/state_df.pkl") | |
| county_df.to_pickle("./maps_helpers/county_df.pkl") | |
| states_gdf_caps.to_pickle("./maps_helpers/states_gdf_caps.pkl") | |
| counties_gdf_caps.to_pickle("./maps_helpers/counties_gdf_caps.pkl") | |
| city_mapping_df.to_pickle("./maps_helpers/city_mapping_df.pkl") | |
| city_plans_df.to_pickle("./maps_helpers/city_plans_df.pkl") | |