Spaces:

sasha
/

energy-cost-compute

Sleeping

File size: 4,524 Bytes

726379a

import gradio as gr
import pandas as pd
import plotly.express as px

merged_df = pd.read_csv("merged_cloud_data.csv")

tdp_fig = px.scatter(
    merged_df,
    x="Total TDP (W)",
    y="$/Hour",
    color="provider",
    log_x=True,
    log_y=True,
    trendline="ols",
    trendline_options=dict(log_y=True, log_x=True),
    trendline_scope="overall",
)


cost_fig = px.scatter(
    merged_df,
    x="GPU Total Cost",
    y="$/Hour",
    color="GPU Type",
    log_y=True,
    log_x=True,
    trendline="ols",
    trendline_options=dict(log_x=True, log_y=True),
    trendline_scope="overall",
)


color_discrete_map = {"Direct": "#2ca02c", "Indirect": "#1f77b4", "None": "#d62728"}


def generate_figure(org_name):
    org_data = data[data["Organization"] == org_name]
    model_counts = (
        org_data.groupby("Year")[["Model", "Environmental Transparency"]]
        .value_counts()
        .reset_index()
    )
    model_counts.columns = ["Year", "Model", "Environmental Transparency", "Count"]
    fig = px.bar(
        model_counts,
        x="Year",
        y="Count",
        color="Environmental Transparency",
        color_discrete_map=color_discrete_map,
        hover_data=["Model"],
    )
    fig.update_layout(xaxis_type="category")
    fig.update_xaxes(categoryorder="category ascending")
    return fig


with gr.Blocks() as demo:
    gr.Markdown("# Environmental Transparency Explorer Tool 🕵️‍♀️🌎")
    gr.Markdown(
        "## Explore the data from 'Misinformation by Omission: The Need for More Environmental Transparency in AI'"
    )
    with gr.Accordion("Methodology", open=False):
        gr.Markdown(
            'We analyzed Epoch AI\'s "Notable AI Models" dataset, which tracks information on “models that were state of the art, highly cited, \
        or otherwise historically notable” released over time. We selected the time period starting in 2010 as this is the beginning of the modern “deep learning era” \
        (as defined by Epoch AI), which is representative of the types of AI models currently trained and deployed, including all 754 models from 2010 \
        to the first quarter of 2025 in our analysis. We examined the level of environmental impact transparency for each model based on key information \
        from the Epoch AI dataset (e.g., model accessibility, training compute estimation method) as well as from individual model release content \
        (e.g., paper, model card, announcement).'
        )
    with gr.Row():
        with gr.Column():
            gr.Markdown("### All Data")
            counts = (
                data.groupby("Year")[["Model", "Environmental Transparency"]]
                .value_counts()
                .reset_index()
            )
            counts.columns = ["Year", "Model", "Environmental Transparency", "Count"]
            fig2 = px.bar(
                counts,
                x="Year",
                y="Count",
                color="Environmental Transparency",
                color_discrete_map=color_discrete_map,
                hover_data=["Model"],
            )
            fig2.update_layout(xaxis_type="category")
            fig2.update_xaxes(categoryorder="category ascending")

            plt2 = gr.Plot(fig2)
    with gr.Row():
        with gr.Column(scale=1):
            org_choice = gr.Dropdown(
                organizations,
                value="",
                label="Organizations",
                info="Pick an organization to explore their environmental disclosures",
                interactive=True,
            )
            gr.Markdown("The 3 transparency categories are:")
            gr.Markdown(
                "**Direct Disclosure**: Developers explicitly reported energy or GHG emissions, e.g., using hardware TDP, country average carbon intensity or measurements."
            )
            gr.Markdown(
                "**Indirect Disclosure**: Developers provided training compute data or released their model weights, allowing external estimates of training or inference impacts."
            )
            gr.Markdown(
                "**No Disclosure**: Environmental impact data was not publicly released and estimation approaches (as noted in Indirect Disclosure) were not possible."
            )
        with gr.Column(scale=4):
            gr.Markdown("### Data by Organization")
            fig = generate_figure(org_choice)
            plt = gr.Plot(fig)

    org_choice.select(generate_figure, inputs=[org_choice], outputs=[plt])

demo.launch()