sasha's picture
sasha HF Staff
adding data, app copy
726379a
raw
history blame
4.52 kB
import gradio as gr
import pandas as pd
import plotly.express as px
merged_df = pd.read_csv("merged_cloud_data.csv")
tdp_fig = px.scatter(
merged_df,
x="Total TDP (W)",
y="$/Hour",
color="provider",
log_x=True,
log_y=True,
trendline="ols",
trendline_options=dict(log_y=True, log_x=True),
trendline_scope="overall",
)
cost_fig = px.scatter(
merged_df,
x="GPU Total Cost",
y="$/Hour",
color="GPU Type",
log_y=True,
log_x=True,
trendline="ols",
trendline_options=dict(log_x=True, log_y=True),
trendline_scope="overall",
)
color_discrete_map = {"Direct": "#2ca02c", "Indirect": "#1f77b4", "None": "#d62728"}
def generate_figure(org_name):
org_data = data[data["Organization"] == org_name]
model_counts = (
org_data.groupby("Year")[["Model", "Environmental Transparency"]]
.value_counts()
.reset_index()
)
model_counts.columns = ["Year", "Model", "Environmental Transparency", "Count"]
fig = px.bar(
model_counts,
x="Year",
y="Count",
color="Environmental Transparency",
color_discrete_map=color_discrete_map,
hover_data=["Model"],
)
fig.update_layout(xaxis_type="category")
fig.update_xaxes(categoryorder="category ascending")
return fig
with gr.Blocks() as demo:
gr.Markdown("# Environmental Transparency Explorer Tool 🕵️‍♀️🌎")
gr.Markdown(
"## Explore the data from 'Misinformation by Omission: The Need for More Environmental Transparency in AI'"
)
with gr.Accordion("Methodology", open=False):
gr.Markdown(
'We analyzed Epoch AI\'s "Notable AI Models" dataset, which tracks information on “models that were state of the art, highly cited, \
or otherwise historically notable” released over time. We selected the time period starting in 2010 as this is the beginning of the modern “deep learning era” \
(as defined by Epoch AI), which is representative of the types of AI models currently trained and deployed, including all 754 models from 2010 \
to the first quarter of 2025 in our analysis. We examined the level of environmental impact transparency for each model based on key information \
from the Epoch AI dataset (e.g., model accessibility, training compute estimation method) as well as from individual model release content \
(e.g., paper, model card, announcement).'
)
with gr.Row():
with gr.Column():
gr.Markdown("### All Data")
counts = (
data.groupby("Year")[["Model", "Environmental Transparency"]]
.value_counts()
.reset_index()
)
counts.columns = ["Year", "Model", "Environmental Transparency", "Count"]
fig2 = px.bar(
counts,
x="Year",
y="Count",
color="Environmental Transparency",
color_discrete_map=color_discrete_map,
hover_data=["Model"],
)
fig2.update_layout(xaxis_type="category")
fig2.update_xaxes(categoryorder="category ascending")
plt2 = gr.Plot(fig2)
with gr.Row():
with gr.Column(scale=1):
org_choice = gr.Dropdown(
organizations,
value="",
label="Organizations",
info="Pick an organization to explore their environmental disclosures",
interactive=True,
)
gr.Markdown("The 3 transparency categories are:")
gr.Markdown(
"**Direct Disclosure**: Developers explicitly reported energy or GHG emissions, e.g., using hardware TDP, country average carbon intensity or measurements."
)
gr.Markdown(
"**Indirect Disclosure**: Developers provided training compute data or released their model weights, allowing external estimates of training or inference impacts."
)
gr.Markdown(
"**No Disclosure**: Environmental impact data was not publicly released and estimation approaches (as noted in Indirect Disclosure) were not possible."
)
with gr.Column(scale=4):
gr.Markdown("### Data by Organization")
fig = generate_figure(org_choice)
plt = gr.Plot(fig)
org_choice.select(generate_figure, inputs=[org_choice], outputs=[plt])
demo.launch()