Spaces:
Runtime error
Runtime error
sashavor
commited on
Commit
ยท
419df8a
1
Parent(s):
a23bf4a
big overhaul
Browse files
app.py
CHANGED
|
@@ -10,10 +10,10 @@ CACHED_FILE_PATH = hf_hub_download(repo_id="sasha/co2_submissions", filename="co
|
|
| 10 |
|
| 11 |
api = HfApi()
|
| 12 |
|
| 13 |
-
def write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_emissions):
|
| 14 |
with open(CACHED_FILE_PATH,'a', newline='') as f:
|
| 15 |
writer = csv.writer(f)
|
| 16 |
-
writer.writerow([hardware, training_time, provider, carbon_intensity, dynamic_emissions])
|
| 17 |
api.upload_file(
|
| 18 |
path_or_fileobj=CACHED_FILE_PATH,
|
| 19 |
path_in_repo="co2_emissions.csv",
|
|
@@ -48,39 +48,55 @@ instances = pd.read_csv(compute_url)
|
|
| 48 |
providers = [p.upper() for p in instances['provider'].unique().tolist()]
|
| 49 |
providers.append('Local/Private Infastructure')
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
kg_per_mile = 0.348
|
|
|
|
| 52 |
|
| 53 |
electricity = pd.read_csv(electricity_url)
|
| 54 |
servers = pd.read_csv(server_url)
|
| 55 |
-
#print(servers.columns)
|
| 56 |
embodied_gpu = pd.read_csv(embodied_gpu_url)
|
| 57 |
-
#print(embodied_gpu.columns)
|
| 58 |
-
|
| 59 |
#st.image('images/MIT_carbon_image_narrow.png', use_column_width=True, caption = 'Image credit: ')
|
| 60 |
st.title("AI Carbon Calculator")
|
| 61 |
|
| 62 |
st.markdown('## Estimate your AI model\'s CO2 carbon footprint! ๐๐ฅ๏ธ๐')
|
| 63 |
-
|
| 64 |
-
st.markdown('#####
|
| 65 |
-
'
|
| 66 |
-
st.markdown('##### Don\'t forget to share your data to help us get a better idea of AI model\'s carbon emissions!')
|
| 67 |
|
| 68 |
st.markdown('### Dynamic Emissions ๐')
|
| 69 |
st.markdown('##### These are the emissions produced by generating the electricity necessary for powering model training.')
|
| 70 |
with st.expander("Calculate the dynamic emissions of your model"):
|
| 71 |
-
col1, col2, col3, col4 = st.columns(
|
| 72 |
with col1:
|
| 73 |
-
hardware = st.selectbox('
|
| 74 |
gpu_tdp = TDP['tdp_watts'][TDP['name'] == hardware].tolist()[0]
|
| 75 |
-
st.markdown("Different
|
| 76 |
with col2:
|
| 77 |
-
|
| 78 |
-
st.markdown('This is calculated by multiplying the number of GPUs you used by the training time: '
|
| 79 |
-
|
| 80 |
with col3:
|
|
|
|
|
|
|
| 81 |
provider = st.selectbox('Provider used', providers)
|
| 82 |
st.markdown('If you can\'t find your provider here, select "Local/Private Infrastructure".')
|
| 83 |
-
with
|
| 84 |
if provider != 'Local/Private Infastructure':
|
| 85 |
provider_instances = instances['region'][instances['provider'] == provider.lower()].unique().tolist()
|
| 86 |
region = st.selectbox('Provider used', provider_instances)
|
|
@@ -90,7 +106,7 @@ with st.expander("Calculate the dynamic emissions of your model"):
|
|
| 90 |
carbon_intensity = st.number_input('Carbon intensity of your energy grid, in grams of CO2 per kWh')
|
| 91 |
st.markdown('You can consult a resource like the [IEA](https://www.iea.org/countries) or '
|
| 92 |
' [Electricity Map](https://app.electricitymaps.com/) to get this information.')
|
| 93 |
-
dynamic_emissions = round(gpu_tdp * training_time * carbon_intensity/1000000)
|
| 94 |
st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq')
|
| 95 |
st.markdown('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car'
|
| 96 |
' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)')
|
|
@@ -106,7 +122,7 @@ with st.expander("Calculate the experimental emissions of your model"):
|
|
| 106 |
experimental_emissions = round(gpu_tdp * (experimentation_time) * carbon_intensity/1000000)
|
| 107 |
st.metric(label="Experimental emissions", value=str(0.0)+' kilograms of CO2eq')
|
| 108 |
|
| 109 |
-
st.markdown('###
|
| 110 |
st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
|
| 111 |
'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
|
| 112 |
with st.expander("Calculate the idle emissions of your model"):
|
|
@@ -134,7 +150,7 @@ with st.expander("Calculate the idle emissions of your model"):
|
|
| 134 |
|
| 135 |
else:
|
| 136 |
st.markdown('##### Try to find the PUE of your local infrastructure. Otherwise, you can use the industry average, 1.58:')
|
| 137 |
-
pue = st.
|
| 138 |
else:
|
| 139 |
st.markdown('##### The PUE of the datacenter you used is: ')
|
| 140 |
st.markdown('#### '+ str(pue))
|
|
@@ -145,23 +161,38 @@ st.markdown('### Embodied Emissions ๐ฅ๏ธ๐จ')
|
|
| 145 |
st.markdown('##### These are the emissions associated with the materials and processes involved in producing'
|
| 146 |
' the computing equipment needed for AI models.')
|
| 147 |
with st.expander("Calculate the embodied emissions of your model"):
|
| 148 |
-
st.markdown('
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
m = st.markdown("""
|
| 151 |
<style>
|
| 152 |
div.stButton > button:first-child {
|
| 153 |
background-color: rgb(80, 200, 120);
|
| 154 |
background-image: none;
|
| 155 |
-
font-size:
|
| 156 |
height: 3em;
|
|
|
|
| 157 |
}
|
| 158 |
</style>""", unsafe_allow_html=True)
|
| 159 |
-
buttoncol1,
|
| 160 |
-
with
|
| 161 |
-
st.button(label="
|
| 162 |
|
| 163 |
st.markdown('### Methodology')
|
| 164 |
with st.expander("More information about our Methodology"):
|
| 165 |
st.markdown('Building on the work of the [ML CO2 Calculator](https://mlco2.github.io/impact/), this tool allows you to consider'
|
| 166 |
' other aspects of your model\'s carbon footprint based on the LCA methodology.')
|
|
|
|
| 167 |
st.image('images/LCA_CO2.png', caption='The LCA methodology - the parts in green are those we focus on.')
|
|
|
|
| 10 |
|
| 11 |
api = HfApi()
|
| 12 |
|
| 13 |
+
def write_to_csv(hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info):
|
| 14 |
with open(CACHED_FILE_PATH,'a', newline='') as f:
|
| 15 |
writer = csv.writer(f)
|
| 16 |
+
writer.writerow([hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info])
|
| 17 |
api.upload_file(
|
| 18 |
path_or_fileobj=CACHED_FILE_PATH,
|
| 19 |
path_in_repo="co2_emissions.csv",
|
|
|
|
| 48 |
providers = [p.upper() for p in instances['provider'].unique().tolist()]
|
| 49 |
providers.append('Local/Private Infastructure')
|
| 50 |
|
| 51 |
+
### Default values
|
| 52 |
+
hardware = "N/A"
|
| 53 |
+
gpu_tdp = 0
|
| 54 |
+
num_gpus = 0
|
| 55 |
+
training_time = 0.0
|
| 56 |
+
provider = "N/A"
|
| 57 |
+
carbon_intensity = 0.0
|
| 58 |
+
dynamic_emissions = 0.0
|
| 59 |
+
experimentation_time = 0.0
|
| 60 |
+
experimental_emissions = 0.0
|
| 61 |
+
pue = 1.0
|
| 62 |
+
pue_emissions = 0.0
|
| 63 |
+
embodied_type = 0.0
|
| 64 |
+
embodied_emissions = 0.0
|
| 65 |
+
model_info = "N/A"
|
| 66 |
+
|
| 67 |
+
### Conversion factors
|
| 68 |
kg_per_mile = 0.348
|
| 69 |
+
embodied_conversion_factor = 0.0289
|
| 70 |
|
| 71 |
electricity = pd.read_csv(electricity_url)
|
| 72 |
servers = pd.read_csv(server_url)
|
|
|
|
| 73 |
embodied_gpu = pd.read_csv(embodied_gpu_url)
|
|
|
|
|
|
|
| 74 |
#st.image('images/MIT_carbon_image_narrow.png', use_column_width=True, caption = 'Image credit: ')
|
| 75 |
st.title("AI Carbon Calculator")
|
| 76 |
|
| 77 |
st.markdown('## Estimate your AI model\'s CO2 carbon footprint! ๐๐ฅ๏ธ๐')
|
| 78 |
+
st.markdown('### Calculating the carbon footprint of AI models can be hard... this tool is here to help!')
|
| 79 |
+
st.markdown('##### Use the calculators below to calculate different aspects of your model\'s carbon footprint' \
|
| 80 |
+
'and don\'t forget to share your data to help the community better understand the carbon emissions of AI!')
|
|
|
|
| 81 |
|
| 82 |
st.markdown('### Dynamic Emissions ๐')
|
| 83 |
st.markdown('##### These are the emissions produced by generating the electricity necessary for powering model training.')
|
| 84 |
with st.expander("Calculate the dynamic emissions of your model"):
|
| 85 |
+
col1, col2, col3, col4, col5 = st.columns(5)
|
| 86 |
with col1:
|
| 87 |
+
hardware = st.selectbox('Hardware used', TDP['name'].tolist())
|
| 88 |
gpu_tdp = TDP['tdp_watts'][TDP['name'] == hardware].tolist()[0]
|
| 89 |
+
st.markdown("Different hardware has different TDP (Thermal Design Power), which impacts how much energy you use.")
|
| 90 |
with col2:
|
| 91 |
+
num_gpus = st.number_input('Number of GPUs/CPUs/TPUs used', value = 16)
|
| 92 |
+
#st.markdown('This is calculated by multiplying the number of GPUs you used by the training time: '
|
| 93 |
+
# 'i.e. if you used 100 GPUs for 10 hours, this is equal to 100x10 = 1,000 GPU hours.')
|
| 94 |
with col3:
|
| 95 |
+
training_time = st.number_input('Total training time (in hours)', value = 0.0)
|
| 96 |
+
with col4:
|
| 97 |
provider = st.selectbox('Provider used', providers)
|
| 98 |
st.markdown('If you can\'t find your provider here, select "Local/Private Infrastructure".')
|
| 99 |
+
with col5:
|
| 100 |
if provider != 'Local/Private Infastructure':
|
| 101 |
provider_instances = instances['region'][instances['provider'] == provider.lower()].unique().tolist()
|
| 102 |
region = st.selectbox('Provider used', provider_instances)
|
|
|
|
| 106 |
carbon_intensity = st.number_input('Carbon intensity of your energy grid, in grams of CO2 per kWh')
|
| 107 |
st.markdown('You can consult a resource like the [IEA](https://www.iea.org/countries) or '
|
| 108 |
' [Electricity Map](https://app.electricitymaps.com/) to get this information.')
|
| 109 |
+
dynamic_emissions = round(gpu_tdp * num_gpus*training_time * carbon_intensity/1000000)
|
| 110 |
st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq')
|
| 111 |
st.markdown('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car'
|
| 112 |
' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)')
|
|
|
|
| 122 |
experimental_emissions = round(gpu_tdp * (experimentation_time) * carbon_intensity/1000000)
|
| 123 |
st.metric(label="Experimental emissions", value=str(0.0)+' kilograms of CO2eq')
|
| 124 |
|
| 125 |
+
st.markdown('### Datacenter (Overhead) Emissions ๐')
|
| 126 |
st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
|
| 127 |
'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
|
| 128 |
with st.expander("Calculate the idle emissions of your model"):
|
|
|
|
| 150 |
|
| 151 |
else:
|
| 152 |
st.markdown('##### Try to find the PUE of your local infrastructure. Otherwise, you can use the industry average, 1.58:')
|
| 153 |
+
pue = st.slider('Total number of GPU hours', value = 1.58)
|
| 154 |
else:
|
| 155 |
st.markdown('##### The PUE of the datacenter you used is: ')
|
| 156 |
st.markdown('#### '+ str(pue))
|
|
|
|
| 161 |
st.markdown('##### These are the emissions associated with the materials and processes involved in producing'
|
| 162 |
' the computing equipment needed for AI models.')
|
| 163 |
with st.expander("Calculate the embodied emissions of your model"):
|
| 164 |
+
st.markdown('These are the trickiest emissions to track down since a lot of the information needed is missing.')
|
| 165 |
+
st.markdown('##### Based on the number of GPUs and training time you indicated above, we can estimate that your model\'s embodied emissions are approximately: ')
|
| 166 |
+
hardware_type = TDP['type'][TDP['name'] == hardware].tolist()[0]
|
| 167 |
+
if hardware_type == 'cpu':
|
| 168 |
+
embodied_type = embodied_gpu['Value'][embodied_gpu['Ratio']=='Manufacturing emissions per additional CPU (kgCOโeq)'].tolist()[0]
|
| 169 |
+
elif hardware_type == 'gpu' or hardware_type == 'tpu':
|
| 170 |
+
embodied_type = embodied_gpu['Value'][embodied_gpu['Ratio']=='Manufacturing emissions per additionnal GPU Card (kgCOโeq)'].tolist()[0]
|
| 171 |
+
embodied_emissions = round(int(embodied_type)*embodied_conversion_factor*num_gpus*training_time/1000,1)
|
| 172 |
+
st.metric(label="Embodied emissions", value=str(embodied_emissions)+' kilograms of CO2eq')
|
| 173 |
+
st.markdown('This is a high-level estimate based on an hourly manufacturing emissions conversion factor (linearly ammortised) of 0.0289 [(source)](https://docs.google.com/spreadsheets/d/1DqYgQnEDLQVQm5acMAhLgHLD8xXCG9BIrk-_Nv6jF3k/).')
|
| 174 |
+
|
| 175 |
+
st.markdown('### Model Information โน๏ธ')
|
| 176 |
+
st.markdown('##### If you want to share the link to your model code or paper, please do so below! Otherwise, your submission will be anonymous.')
|
| 177 |
+
model_info = st.text_input(label= "Enter a link to your model (optional)")
|
| 178 |
|
| 179 |
m = st.markdown("""
|
| 180 |
<style>
|
| 181 |
div.stButton > button:first-child {
|
| 182 |
background-color: rgb(80, 200, 120);
|
| 183 |
background-image: none;
|
| 184 |
+
font-size: 25px;
|
| 185 |
height: 3em;
|
| 186 |
+
width: 15em;
|
| 187 |
}
|
| 188 |
</style>""", unsafe_allow_html=True)
|
| 189 |
+
buttoncol1, buttoncol2, buttoncol3 = st.columns(3)
|
| 190 |
+
with buttoncol2:
|
| 191 |
+
st.button(label="Share my CO2 data!", on_click = lambda *args: write_to_csv(hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info))
|
| 192 |
|
| 193 |
st.markdown('### Methodology')
|
| 194 |
with st.expander("More information about our Methodology"):
|
| 195 |
st.markdown('Building on the work of the [ML CO2 Calculator](https://mlco2.github.io/impact/), this tool allows you to consider'
|
| 196 |
' other aspects of your model\'s carbon footprint based on the LCA methodology.')
|
| 197 |
+
st.markdown('We considered all of these aspects when calculating the CO2 emissions of BLOOM ๐ธ, a 176-billion parameter language model [(see our preprint!)](https://arxiv.org/abs/2211.02001)'')')
|
| 198 |
st.image('images/LCA_CO2.png', caption='The LCA methodology - the parts in green are those we focus on.')
|