Spaces:
Sleeping
Sleeping
| # %% | |
| import polars as pl | |
| # Notice that the world health leaves missing as blanks in the csv. We need to explain that blanks aren't strings but missing values. | |
| dat = pl.read_csv("../API_Download_DS2_en_csv_v2_5657328.csv", skip_rows=4, null_values = "") | |
| # We don't like the World Banks wide format. Let's clean it upt to long format. | |
| dat_long = dat.melt(id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"]) | |
| # no we need to fix the year column and give it a better name. | |
| # we could have fixed the name as an argument in `.melt()` as well. | |
| # https://docs.pola.rs/user-guide/expressions/casting/#overflow | |
| dat_long = dat_long\ | |
| .with_columns( | |
| pl.col("variable").cast(pl.Int64, strict=False).alias("variable"), | |
| pl.col("value").cast(pl.Float32, strict=False).alias("value"))\ | |
| .rename({"variable":"year"})\ | |
| .filter(pl.col("value").is_not_null()) | |
| # %% | |
| # Can we split out the information in the indicator Code | |
| indicator_columns = dat_long\ | |
| .select( | |
| pl.col("Indicator Code"), | |
| pl.col("Indicator Code")\ | |
| # split string: example VC.IDP.TOCV into list object ["VC", "IDP", "TOCV"] | |
| .str.split_exact(".", 6).alias("split")).unnest("split")\ | |
| .unique() | |
| # What should we call these columns? | |
| # https://datahelpdesk.worldbank.org/knowledgebase/articles/201175-how-does-the-world-bank-code-its-indicators | |
| new_names = {"field_0":"topic", "field_1":"general_subj", "field_2":"specific_subj", | |
| "field_3":"ext_1", "field_4":"ext_2", "field_5":"ext_3", "field_6":"ext_4"} | |
| indicator_columns = indicator_columns.rename(new_names) | |
| # %% | |
| # now we need to finalize our munge and write our data | |
| dat_final = dat_long.join(indicator_columns, how="left", on="Indicator Code") | |
| # Now I want to reorder the columns | |
| names = dat_final.columns | |
| new_order = [1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 3, 0] | |
| name_order = [names[i] for i in new_order] | |
| dat_final = dat_final.select(name_order) | |
| # %% | |
| # write data | |
| dat_final.write_csv("../dat_munged.csv") | |
| # %% | |