Spaces:

sasha
/

energy-cost-compute

Sleeping

App Files Files Community

energy-cost-compute / app.py

sasha HF Staff

adding data, app copy

726379a about 2 months ago

raw

history blame

4.52 kB

	import gradio as gr
	import pandas as pd
	import plotly.express as px

	merged_df = pd.read_csv("merged_cloud_data.csv")

	tdp_fig = px.scatter(
	merged_df,
	x="Total TDP (W)",
	y="$/Hour",
	color="provider",
	log_x=True,
	log_y=True,
	trendline="ols",
	trendline_options=dict(log_y=True, log_x=True),
	trendline_scope="overall",
	)


	cost_fig = px.scatter(
	merged_df,
	x="GPU Total Cost",
	y="$/Hour",
	color="GPU Type",
	log_y=True,
	log_x=True,
	trendline="ols",
	trendline_options=dict(log_x=True, log_y=True),
	trendline_scope="overall",
	)


	color_discrete_map = {"Direct": "#2ca02c", "Indirect": "#1f77b4", "None": "#d62728"}


	def generate_figure(org_name):
	org_data = data[data["Organization"] == org_name]
	model_counts = (
	org_data.groupby("Year")[["Model", "Environmental Transparency"]]
	.value_counts()
	.reset_index()
	)
	model_counts.columns = ["Year", "Model", "Environmental Transparency", "Count"]
	fig = px.bar(
	model_counts,
	x="Year",
	y="Count",
	color="Environmental Transparency",
	color_discrete_map=color_discrete_map,
	hover_data=["Model"],
	)
	fig.update_layout(xaxis_type="category")
	fig.update_xaxes(categoryorder="category ascending")
	return fig


	with gr.Blocks() as demo:
	gr.Markdown("# Environmental Transparency Explorer Tool 🕵️‍♀️🌎")
	gr.Markdown(
	"## Explore the data from 'Misinformation by Omission: The Need for More Environmental Transparency in AI'"
	)
	with gr.Accordion("Methodology", open=False):
	gr.Markdown(
	'We analyzed Epoch AI\'s "Notable AI Models" dataset, which tracks information on “models that were state of the art, highly cited, \
	or otherwise historically notable” released over time. We selected the time period starting in 2010 as this is the beginning of the modern “deep learning era” \
	(as defined by Epoch AI), which is representative of the types of AI models currently trained and deployed, including all 754 models from 2010 \
	to the first quarter of 2025 in our analysis. We examined the level of environmental impact transparency for each model based on key information \
	from the Epoch AI dataset (e.g., model accessibility, training compute estimation method) as well as from individual model release content \
	(e.g., paper, model card, announcement).'
	)
	with gr.Row():
	with gr.Column():
	gr.Markdown("### All Data")
	counts = (
	data.groupby("Year")[["Model", "Environmental Transparency"]]
	.value_counts()
	.reset_index()
	)
	counts.columns = ["Year", "Model", "Environmental Transparency", "Count"]
	fig2 = px.bar(
	counts,
	x="Year",
	y="Count",
	color="Environmental Transparency",
	color_discrete_map=color_discrete_map,
	hover_data=["Model"],
	)
	fig2.update_layout(xaxis_type="category")
	fig2.update_xaxes(categoryorder="category ascending")

	plt2 = gr.Plot(fig2)
	with gr.Row():
	with gr.Column(scale=1):
	org_choice = gr.Dropdown(
	organizations,
	value="",
	label="Organizations",
	info="Pick an organization to explore their environmental disclosures",
	interactive=True,
	)
	gr.Markdown("The 3 transparency categories are:")
	gr.Markdown(
	"Direct Disclosure: Developers explicitly reported energy or GHG emissions, e.g., using hardware TDP, country average carbon intensity or measurements."
	)
	gr.Markdown(
	"Indirect Disclosure: Developers provided training compute data or released their model weights, allowing external estimates of training or inference impacts."
	)
	gr.Markdown(
	"No Disclosure: Environmental impact data was not publicly released and estimation approaches (as noted in Indirect Disclosure) were not possible."
	)
	with gr.Column(scale=4):
	gr.Markdown("### Data by Organization")
	fig = generate_figure(org_choice)
	plt = gr.Plot(fig)

	org_choice.select(generate_figure, inputs=[org_choice], outputs=[plt])

	demo.launch()