Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import pandas as pd | |
| import os, csv | |
| from huggingface_hub import hf_hub_download, HfApi | |
| import math | |
| HF_TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN') | |
| CACHED_FILE_PATH = hf_hub_download(repo_id="sasha/co2_submissions", filename="co2_emissions.csv", repo_type="dataset") | |
| api = HfApi() | |
| def write_to_csv(hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info): | |
| st.session_state["is_shared"] = True | |
| with open(CACHED_FILE_PATH,'a', newline='') as f: | |
| writer = csv.writer(f) | |
| writer.writerow([hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info]) | |
| api.upload_file( | |
| path_or_fileobj=CACHED_FILE_PATH, | |
| path_in_repo="co2_emissions.csv", | |
| repo_id="sasha/co2_submissions", | |
| repo_type="dataset", | |
| ) | |
| st.set_page_config( | |
| page_title="AI Carbon Calculator", | |
| layout="wide", | |
| ) | |
| tdp_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/gpus.csv" | |
| compute_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/impact.csv" | |
| electricity_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/2021-10-27yearly_averages.csv" | |
| server_sheet_id = "1DqYgQnEDLQVQm5acMAhLgHLD8xXCG9BIrk-_Nv6jF3k" | |
| server_sheet_name = "Server%20Carbon%20Footprint" | |
| server_url = f"https://docs.google.com/spreadsheets/d/{server_sheet_id}/gviz/tq?tqx=out:csv&sheet={server_sheet_name}" | |
| embodied_gpu_sheet_name = "Scope%203%20Ratios" | |
| embodied_gpu_url = f"https://docs.google.com/spreadsheets/d/{server_sheet_id}/gviz/tq?tqx=out:csv&sheet={embodied_gpu_sheet_name}" | |
| TDP =pd.read_csv(tdp_url) | |
| instances = pd.read_csv(compute_url) | |
| providers = [p.upper() for p in instances['provider'].unique().tolist()] | |
| providers.append('Local/Private Infastructure') | |
| ### Default values | |
| hardware = "N/A" | |
| gpu_tdp = 0 | |
| num_gpus = 0 | |
| training_time = 0.0 | |
| provider = "N/A" | |
| carbon_intensity = 0.0 | |
| dynamic_emissions = 0.0 | |
| experimentation_time = 0.0 | |
| experimental_emissions = 0.0 | |
| pue = 1.0 | |
| pue_emissions = 0.0 | |
| embodied_type = 0.0 | |
| embodied_emissions = 0.0 | |
| model_info = "N/A" | |
| ### Conversion factors | |
| kg_per_mile = 0.348 | |
| embodied_conversion_factor = 0.0289 | |
| if "is_shared" not in st.session_state: | |
| st.session_state["is_shared"] = False | |
| electricity = pd.read_csv(electricity_url) | |
| servers = pd.read_csv(server_url) | |
| embodied_gpu = pd.read_csv(embodied_gpu_url) | |
| #st.image('images/MIT_carbon_image_narrow.png', use_column_width=True, caption = 'Image credit: ') | |
| st.title("AI Carbon Calculator") | |
| st.markdown('## Estimate your AI model\'s CO2 carbon footprint! ππ₯οΈπ') | |
| st.markdown('### Calculating the carbon footprint of AI models can be hard... this tool is here to help!') | |
| st.markdown('##### Use the calculators below to calculate different aspects of your model\'s carbon footprint' \ | |
| 'and don\'t forget to share your data to help the community better understand the carbon emissions of AI!') | |
| st.markdown('### Dynamic Emissions π') | |
| st.markdown('##### These are the emissions produced by generating the electricity necessary for powering model training.') | |
| with st.expander("Calculate the dynamic emissions of your model"): | |
| col1, col2, col3, col4, col5 = st.columns(5) | |
| with col1: | |
| hardware = st.selectbox('Hardware used', TDP['name'].tolist()) | |
| gpu_tdp = TDP['tdp_watts'][TDP['name'] == hardware].tolist()[0] | |
| st.markdown("Different hardware has different efficiencies, which impacts how much energy you use.") | |
| with col2: | |
| num_gpus = st.text_input('Number of GPUs/CPUs/TPUs used', value = 16) | |
| st.markdown('If you can\'t find your hardware in the list, select the closest similar model.') | |
| with col3: | |
| training_time = st.number_input('Total training time (in hours)', value = 0.0) | |
| st.markdown('You can find this number in your training logs or TensorBoards') | |
| with col4: | |
| provider = st.selectbox('Provider used', providers) | |
| st.markdown('If you can\'t find your provider here, select "Local/Private Infrastructure".') | |
| with col5: | |
| if provider != 'Local/Private Infastructure': | |
| provider_instances = instances['region'][instances['provider'] == provider.lower()].unique().tolist() | |
| region = st.selectbox('Region used', provider_instances) | |
| carbon_intensity = instances['impact'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0] | |
| else: | |
| carbon_intensity = st.number_input('Carbon intensity of your energy grid, in grams of CO2 per kWh') | |
| st.markdown('You can consult a resource like the [IEA](https://www.iea.org/countries) or ' | |
| ' [Electricity Map](https://app.electricitymaps.com/) to get this information.') | |
| dynamic_emissions = round(gpu_tdp * float(num_gpus)*training_time * carbon_intensity/1000000) | |
| st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq') | |
| st.info('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car' | |
| ' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)') | |
| st.markdown('### Experimental Emissions π©βπ¬') | |
| st.markdown('##### These are the emissions produced by generating the electricity necessary for powering the experiments and tests needed to pick your final model architecture ' | |
| 'and parameters.') | |
| with st.expander("Calculate the experimental emissions of your model"): | |
| st.info('Consult your training logs to figure out how many ablations, baselines and experiments were run before converging on the final model.') | |
| experimentation_time = st.number_input(label='Number of hours of experimentation run', value=training_time) | |
| st.markdown('##### As a baseline, language models such as [OPT](https://arxiv.org/pdf/2205.01068.pdf) and [BLOOM](https://arxiv.org/abs/2211.02001)' | |
| ' found that experimentation roughly doubles the amount of compute used by training the model itself.') | |
| experimental_emissions = round(gpu_tdp * (experimentation_time) * carbon_intensity/1000000) | |
| st.metric(label="Experimental emissions", value=str(experimental_emissions)+' kilograms of CO2eq') | |
| st.markdown('### Datacenter (Overhead) Emissions π') | |
| st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure' | |
| 'used for model training -- the datacenter, network, heating/cooling, storage, etc.') | |
| with st.expander("Calculate the idle emissions of your model"): | |
| st.info('A proxy often used to reflect idle emissions is PUE (Power Usage Effectiveness), which represents ' | |
| ' the ratio of energy used for computing overheads like cooling, which varies depending on the data center.') | |
| pue = instances['PUE'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0] | |
| if math.isnan(pue) == True: | |
| if provider != 'Local/Private Infastructure': | |
| st.markdown('##### The exact information isn\'t available for this datacenter! We will use your provider\'s average instead, which is:') | |
| if provider == 'AWS': | |
| pue = 1.135 | |
| st.markdown('#### ' + str(pue)+ " [(source)](https://www.cloudcarbonfootprint.org/docs/methodology/)") | |
| elif provider == 'GCP': | |
| pue = 1.1 | |
| st.markdown('#### ' + str(pue) + " [(source)](https://www.google.ca/about/datacenters/efficiency/)") | |
| elif provider == 'AZURE': | |
| pue = 1.185 | |
| st.markdown('#### ' + str(pue) + " [(source)](https://www.cloudcarbonfootprint.org/docs/methodology/)") | |
| elif provider == 'OVH': | |
| pue = 1.28 | |
| st.markdown('#### ' + str(pue) + " [(source)](https://corporate.ovhcloud.com/en-ca/sustainability/environment/)") | |
| elif provider == 'SCALEWAY': | |
| pue = 1.35 | |
| st.markdown('#### ' +str(pue) + " [(source)](https://pue.dc3.scaleway.com/en/)") | |
| else: | |
| st.markdown('##### Try to find the PUE of your local infrastructure. Otherwise, you can use the industry average, 1.58:') | |
| pue = st.slider('Total number of GPU hours', value = 1.58) | |
| else: | |
| st.markdown('##### The PUE of the datacenter you used is: ') | |
| st.markdown('#### '+ str(pue)) | |
| pue_emissions = round((experimental_emissions+ dynamic_emissions)*pue) | |
| st.metric(label="Emissions considering PUE", value=str(pue_emissions)+' kilograms of CO2eq') | |
| st.markdown('### Embodied Emissions π₯οΈπ¨') | |
| st.markdown('##### These are the emissions associated with the materials and processes involved in producing' | |
| ' the computing equipment needed for AI models.') | |
| with st.expander("Calculate the embodied emissions of your model"): | |
| st.markdown('These are the trickiest emissions to track down since a lot of the information needed is missing.') | |
| st.markdown('##### Based on the number of GPUs and training time you indicated above, we can estimate that your model\'s embodied emissions are approximately: ') | |
| hardware_type = TDP['type'][TDP['name'] == hardware].tolist()[0] | |
| if hardware_type == 'cpu': | |
| embodied_type = embodied_gpu['Value'][embodied_gpu['Ratio']=='Manufacturing emissions per additional CPU (kgCOβeq)'].tolist()[0] | |
| elif hardware_type == 'gpu' or hardware_type == 'tpu': | |
| embodied_type = embodied_gpu['Value'][embodied_gpu['Ratio']=='Manufacturing emissions per additionnal GPU Card (kgCOβeq)'].tolist()[0] | |
| embodied_emissions = round(int(embodied_type)*embodied_conversion_factor*float(num_gpus)*training_time/1000,1) | |
| st.metric(label="Embodied emissions", value=str(embodied_emissions)+' kilograms of CO2eq') | |
| st.markdown('This is a high-level estimate based on an hourly manufacturing emissions conversion factor (linearly ammortised) of 0.0289 [(source)](https://docs.google.com/spreadsheets/d/1DqYgQnEDLQVQm5acMAhLgHLD8xXCG9BIrk-_Nv6jF3k/).') | |
| st.markdown('### Model Information βΉοΈ') | |
| st.markdown('##### If you want to share the link to your model code or paper, please do so below! Otherwise, your submission will be anonymous.') | |
| model_info = st.text_input(label= "Enter a link to your model (optional)") | |
| m = st.markdown(""" | |
| <style> | |
| div.stButton > button:first-child { | |
| background-color: rgb(80, 200, 120); | |
| background-image: none; | |
| font-size: 25px; | |
| height: 3em; | |
| width: 15em; | |
| } | |
| </style>""", unsafe_allow_html=True) | |
| buttoncol1, buttoncol2, buttoncol3 = st.columns(3) | |
| with buttoncol2: | |
| if not st.session_state["is_shared"]: | |
| submitted = st.button(label="Share my CO2 data!", on_click = lambda *args: write_to_csv(hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info)) | |
| else: | |
| st.info('Thank you! Your data has been shared in https://huggingface.co/datasets/sasha/co2_submissions.') | |
| st.markdown('### Methodology') | |
| with st.expander("More information about our Methodology"): | |
| st.markdown('Building on the work of the [ML CO2 Calculator](https://mlco2.github.io/impact/), this tool allows you to consider' | |
| ' other aspects of your model\'s carbon footprint based on the LCA methodology.') | |
| st.markdown('We considered all of these aspects when calculating the CO2 emissions of BLOOM πΈ, a 176-billion parameter language model [(see our preprint!)](https://arxiv.org/abs/2211.02001)'')') | |
| st.image('images/LCA_CO2.png', caption='The LCA methodology - the parts in green are those we focus on.') | |