Spaces:
Runtime error
Runtime error
sashavor
commited on
Commit
Β·
ce8bd36
1
Parent(s):
795ccdc
lotsa changes
Browse files
app.py
CHANGED
|
@@ -2,10 +2,11 @@ import streamlit as st
|
|
| 2 |
import pandas as pd
|
| 3 |
import os, csv
|
| 4 |
from huggingface_hub import hf_hub_download, HfApi
|
|
|
|
| 5 |
|
| 6 |
HF_TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN')
|
| 7 |
|
| 8 |
-
CACHED_FILE_PATH = hf_hub_download(repo_id="sasha/co2_submissions", filename="
|
| 9 |
|
| 10 |
api = HfApi()
|
| 11 |
|
|
@@ -15,18 +16,18 @@ def write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_em
|
|
| 15 |
writer.writerow([hardware, training_time, provider, carbon_intensity, dynamic_emissions])
|
| 16 |
api.upload_file(
|
| 17 |
path_or_fileobj=CACHED_FILE_PATH,
|
| 18 |
-
path_in_repo="
|
| 19 |
repo_id="sasha/co2_submissions",
|
| 20 |
repo_type="dataset",
|
| 21 |
)
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
st.set_page_config(
|
| 26 |
page_title="AI Carbon Calculator",
|
| 27 |
layout="wide",
|
| 28 |
)
|
| 29 |
|
|
|
|
|
|
|
| 30 |
tdp_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/gpus.csv"
|
| 31 |
compute_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/impact.csv"
|
| 32 |
|
|
@@ -38,9 +39,8 @@ server_sheet_name = "Server%20Carbon%20Footprint"
|
|
| 38 |
server_url = f"https://docs.google.com/spreadsheets/d/{server_sheet_id}/gviz/tq?tqx=out:csv&sheet={server_sheet_name}"
|
| 39 |
|
| 40 |
|
| 41 |
-
embodied_gpu_sheet_id = "1DqYgQnEDLQVQm5acMAhLgHLD8xXCG9BIrk-_Nv6jF3k"
|
| 42 |
embodied_gpu_sheet_name = "Scope%203%20Ratios"
|
| 43 |
-
embodied_gpu_url = f"https://docs.google.com/spreadsheets/d/{
|
| 44 |
|
| 45 |
TDP =pd.read_csv(tdp_url)
|
| 46 |
|
|
@@ -52,18 +52,21 @@ kg_per_mile = 0.348
|
|
| 52 |
|
| 53 |
electricity = pd.read_csv(electricity_url)
|
| 54 |
servers = pd.read_csv(server_url)
|
|
|
|
| 55 |
embodied_gpu = pd.read_csv(embodied_gpu_url)
|
|
|
|
| 56 |
|
| 57 |
#st.image('images/MIT_carbon_image_narrow.png', use_column_width=True, caption = 'Image credit: ')
|
| 58 |
st.title("AI Carbon Calculator")
|
| 59 |
|
| 60 |
-
st.markdown('## Estimate your model\'s CO2 carbon footprint!')
|
| 61 |
|
| 62 |
-
st.markdown('#####
|
| 63 |
-
|
|
|
|
| 64 |
|
| 65 |
-
st.markdown('### Dynamic Emissions')
|
| 66 |
-
st.markdown('##### These are the
|
| 67 |
with st.expander("Calculate the dynamic emissions of your model"):
|
| 68 |
col1, col2, col3, col4 = st.columns(4)
|
| 69 |
with col1:
|
|
@@ -91,31 +94,73 @@ with st.expander("Calculate the dynamic emissions of your model"):
|
|
| 91 |
st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq')
|
| 92 |
st.markdown('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car'
|
| 93 |
' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)')
|
| 94 |
-
st.button(label="Anonymously share my data", help="Share the data from your model anonymously for research purposes!",\
|
| 95 |
-
on_click = lambda *args: write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_emissions))
|
| 96 |
-
|
| 97 |
-
st.markdown('### Idle Emissions')
|
| 98 |
-
st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
|
| 99 |
-
'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
|
| 100 |
-
|
| 101 |
-
|
| 102 |
|
| 103 |
-
st.markdown('###
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
|
| 105 |
'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
|
| 106 |
with st.expander("Calculate the idle emissions of your model"):
|
| 107 |
-
st.markdown('
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
st.markdown('
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
with st.expander("More information about our Methodology"):
|
| 117 |
st.markdown('Building on the work of the [ML CO2 Calculator](https://mlco2.github.io/impact/), this tool allows you to consider'
|
| 118 |
' other aspects of your model\'s carbon footprint based on the LCA methodology.')
|
| 119 |
-
|
| 120 |
-
|
| 121 |
st.image('images/LCA_CO2.png', caption='The LCA methodology - the parts in green are those we focus on.')
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import os, csv
|
| 4 |
from huggingface_hub import hf_hub_download, HfApi
|
| 5 |
+
import math
|
| 6 |
|
| 7 |
HF_TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN')
|
| 8 |
|
| 9 |
+
CACHED_FILE_PATH = hf_hub_download(repo_id="sasha/co2_submissions", filename="co2_emissions.csv", repo_type="dataset")
|
| 10 |
|
| 11 |
api = HfApi()
|
| 12 |
|
|
|
|
| 16 |
writer.writerow([hardware, training_time, provider, carbon_intensity, dynamic_emissions])
|
| 17 |
api.upload_file(
|
| 18 |
path_or_fileobj=CACHED_FILE_PATH,
|
| 19 |
+
path_in_repo="co2_emissions.csv",
|
| 20 |
repo_id="sasha/co2_submissions",
|
| 21 |
repo_type="dataset",
|
| 22 |
)
|
| 23 |
|
|
|
|
|
|
|
| 24 |
st.set_page_config(
|
| 25 |
page_title="AI Carbon Calculator",
|
| 26 |
layout="wide",
|
| 27 |
)
|
| 28 |
|
| 29 |
+
|
| 30 |
+
|
| 31 |
tdp_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/gpus.csv"
|
| 32 |
compute_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/impact.csv"
|
| 33 |
|
|
|
|
| 39 |
server_url = f"https://docs.google.com/spreadsheets/d/{server_sheet_id}/gviz/tq?tqx=out:csv&sheet={server_sheet_name}"
|
| 40 |
|
| 41 |
|
|
|
|
| 42 |
embodied_gpu_sheet_name = "Scope%203%20Ratios"
|
| 43 |
+
embodied_gpu_url = f"https://docs.google.com/spreadsheets/d/{server_sheet_id}/gviz/tq?tqx=out:csv&sheet={embodied_gpu_sheet_name}"
|
| 44 |
|
| 45 |
TDP =pd.read_csv(tdp_url)
|
| 46 |
|
|
|
|
| 52 |
|
| 53 |
electricity = pd.read_csv(electricity_url)
|
| 54 |
servers = pd.read_csv(server_url)
|
| 55 |
+
#print(servers.columns)
|
| 56 |
embodied_gpu = pd.read_csv(embodied_gpu_url)
|
| 57 |
+
#print(embodied_gpu.columns)
|
| 58 |
|
| 59 |
#st.image('images/MIT_carbon_image_narrow.png', use_column_width=True, caption = 'Image credit: ')
|
| 60 |
st.title("AI Carbon Calculator")
|
| 61 |
|
| 62 |
+
st.markdown('## Estimate your AI model\'s CO2 carbon footprint! ππ₯οΈπ')
|
| 63 |
|
| 64 |
+
st.markdown('##### The calculators below will help you calculate different aspects of your model\'s carbon footprint, as we did for'
|
| 65 |
+
' BLOOM πΈ, a 176-billion parameter language model [(see our preprint!)](https://arxiv.org/abs/2211.02001)')
|
| 66 |
+
st.markdown('##### Don\'t forget to share your data to help us get a better idea of AI model\'s carbon emissions!')
|
| 67 |
|
| 68 |
+
st.markdown('### Dynamic Emissions π')
|
| 69 |
+
st.markdown('##### These are the emissions produced by generating the electricity necessary for powering model training.')
|
| 70 |
with st.expander("Calculate the dynamic emissions of your model"):
|
| 71 |
col1, col2, col3, col4 = st.columns(4)
|
| 72 |
with col1:
|
|
|
|
| 94 |
st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq')
|
| 95 |
st.markdown('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car'
|
| 96 |
' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
+
st.markdown('### Experimental Emissions π©βπ¬')
|
| 99 |
+
st.markdown('##### These are the emissions produced by generating the electricity necessary for powering the experiments and tests needed to pick your final model architecture '
|
| 100 |
+
'and parameters.')
|
| 101 |
+
with st.expander("Calculate the experimental emissions of your model"):
|
| 102 |
+
st.markdown('##### Consult your training logs to figure out how many ablations, baselines and experiments were run before converging on the final model.')
|
| 103 |
+
experimentation_time = st.number_input(label='Number of hours of experimentation run', value=training_time)
|
| 104 |
+
st.markdown('##### As a baseline, language models such as [OPT](https://arxiv.org/pdf/2205.01068.pdf) and [BLOOM](https://arxiv.org/abs/2211.02001)'
|
| 105 |
+
' found that experimentation roughly doubles the amount of compute used by training the model itself.')
|
| 106 |
+
experimental_emissions = round(gpu_tdp * (experimentation_time) * carbon_intensity/1000000)
|
| 107 |
+
st.metric(label="Experimental emissions", value=str(0.0)+' kilograms of CO2eq')
|
| 108 |
+
|
| 109 |
+
st.markdown('### Idle Emissions π')
|
| 110 |
st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
|
| 111 |
'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
|
| 112 |
with st.expander("Calculate the idle emissions of your model"):
|
| 113 |
+
st.markdown('##### A proxy often used to reflect idle emissions is PUE (Power Usage Effectiveness), which represents '
|
| 114 |
+
' the ratio of energy used for computing overheads like cooling, which varies depending on the data center.')
|
| 115 |
+
pue = instances['PUE'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0]
|
| 116 |
+
if math.isnan(pue) == True:
|
| 117 |
+
if provider != 'Local/Private Infastructure':
|
| 118 |
+
st.markdown('##### The exact information isn\'t available for this datacenter! We will use your provider\'s average instead, which is:')
|
| 119 |
+
if provider == 'AWS':
|
| 120 |
+
pue = 1.135
|
| 121 |
+
st.markdown('#### ' + str(pue)+ " [(source)](https://www.cloudcarbonfootprint.org/docs/methodology/)")
|
| 122 |
+
elif provider == 'GCP':
|
| 123 |
+
pue = 1.1
|
| 124 |
+
st.markdown('#### ' + str(pue) + " [(source)](https://www.google.ca/about/datacenters/efficiency/)")
|
| 125 |
+
elif provider == 'AZURE':
|
| 126 |
+
pue = 1.185
|
| 127 |
+
st.markdown('#### ' + str(pue) + " [(source)](https://www.cloudcarbonfootprint.org/docs/methodology/)")
|
| 128 |
+
elif provider == 'OVH':
|
| 129 |
+
pue = 1.28
|
| 130 |
+
st.markdown('#### ' + str(pue) + " [(source)](https://corporate.ovhcloud.com/en-ca/sustainability/environment/)")
|
| 131 |
+
elif provider == 'SCALEWAY':
|
| 132 |
+
pue = 1.35
|
| 133 |
+
st.markdown('#### ' +str(pue) + " [(source)](https://pue.dc3.scaleway.com/en/)")
|
| 134 |
|
| 135 |
+
else:
|
| 136 |
+
st.markdown('##### Try to find the PUE of your local infrastructure. Otherwise, you can use the industry average, 1.58:')
|
| 137 |
+
pue = st.number_input('Total number of GPU hours', value = 1.58)
|
| 138 |
+
else:
|
| 139 |
+
st.markdown('##### The PUE of the datacenter you used is: ')
|
| 140 |
+
st.markdown('#### '+ str(pue))
|
| 141 |
+
pue_emissions = round((experimental_emissions+ dynamic_emissions)*pue)
|
| 142 |
+
st.metric(label="Emissions considering PUE", value=str(pue_emissions)+' kilograms of CO2eq')
|
| 143 |
+
|
| 144 |
+
st.markdown('### Embodied Emissions π₯οΈπ¨')
|
| 145 |
+
st.markdown('##### These are the emissions associated with the materials and processes involved in producing'
|
| 146 |
+
' the computing equipment needed for AI models.')
|
| 147 |
+
with st.expander("Calculate the embodied emissions of your model"):
|
| 148 |
+
st.markdown('##### These are the trickiest emissions to track down since a lot of the information needed is missing!')
|
| 149 |
+
|
| 150 |
+
m = st.markdown("""
|
| 151 |
+
<style>
|
| 152 |
+
div.stButton > button:first-child {
|
| 153 |
+
background-color: rgb(80, 200, 120);
|
| 154 |
+
font-size: 20px;
|
| 155 |
+
height: 3em;
|
| 156 |
+
}
|
| 157 |
+
</style>""", unsafe_allow_html=True)
|
| 158 |
+
buttoncol1, cuttoncol2, buttoncol3 = st.columns(3)
|
| 159 |
+
with cuttoncol2:
|
| 160 |
+
st.button(label="Anonymously share my data!", on_click = lambda *args: write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_emissions))
|
| 161 |
+
|
| 162 |
+
st.markdown('### Methodology')
|
| 163 |
with st.expander("More information about our Methodology"):
|
| 164 |
st.markdown('Building on the work of the [ML CO2 Calculator](https://mlco2.github.io/impact/), this tool allows you to consider'
|
| 165 |
' other aspects of your model\'s carbon footprint based on the LCA methodology.')
|
|
|
|
|
|
|
| 166 |
st.image('images/LCA_CO2.png', caption='The LCA methodology - the parts in green are those we focus on.')
|