Spaces:
Sleeping
Sleeping
rmm
commited on
Commit
·
a1c0c4d
1
Parent(s):
cf8541f
fix: catch failing external resouce: huggingface dataset
Browse files- this version includes a data mock, since downstream processing
assumes everything worked. one step at a time
- src/maps/obs_map.py +32 -1
src/maps/obs_map.py
CHANGED
|
@@ -3,6 +3,10 @@ import logging
|
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
from datasets import load_dataset
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import streamlit as st
|
| 7 |
import folium
|
| 8 |
from streamlit_folium import st_folium
|
|
@@ -113,6 +117,33 @@ def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> fol
|
|
| 113 |
#folium.LayerControl().add_to(m)
|
| 114 |
return m
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
|
| 118 |
def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
|
|
@@ -139,7 +170,7 @@ def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
|
|
| 139 |
"""
|
| 140 |
|
| 141 |
# load/download data from huggingface dataset
|
| 142 |
-
metadata =
|
| 143 |
|
| 144 |
# make a pandas df that is compliant with folium/streamlit maps
|
| 145 |
_df = pd.DataFrame({
|
|
|
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
from datasets import load_dataset
|
| 6 |
+
from datasets import DatasetDict, Dataset
|
| 7 |
+
|
| 8 |
+
import time
|
| 9 |
+
|
| 10 |
import streamlit as st
|
| 11 |
import folium
|
| 12 |
from streamlit_folium import st_folium
|
|
|
|
| 117 |
#folium.LayerControl().add_to(m)
|
| 118 |
return m
|
| 119 |
|
| 120 |
+
def try_download_dataset(dataset_id:str, data_files:str, mockdata_on_failure:bool=False) -> dict:
|
| 121 |
+
# the `mockdata_on_failure` generates a minimal compliant dataset if the download fails
|
| 122 |
+
# (one step at a time)
|
| 123 |
+
t1 = time.time()
|
| 124 |
+
try:
|
| 125 |
+
m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
|
| 126 |
+
metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
|
| 127 |
+
t2 = time.time(); elap = t2 - t1
|
| 128 |
+
except Exception as e:
|
| 129 |
+
t2 = time.time(); elap = t2 - t1
|
| 130 |
+
msg = f"Error downloading dataset: {e}. (after {elap:.2f}s) Using mock data to continue"
|
| 131 |
+
st.error(msg)
|
| 132 |
+
m_logger.error(msg)
|
| 133 |
+
if mockdata_on_failure:
|
| 134 |
+
metadata = {'train':
|
| 135 |
+
{'latitude': [0],
|
| 136 |
+
'longitude': [0],
|
| 137 |
+
'predicted_class': ['rough_toothed_dolphin']}
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
else:
|
| 141 |
+
metadata = {}
|
| 142 |
+
|
| 143 |
+
msg = f"Downloaded dataset: (after {elap:.2f}s). "
|
| 144 |
+
m_logger.info(msg)
|
| 145 |
+
st.write(msg)
|
| 146 |
+
return metadata
|
| 147 |
|
| 148 |
|
| 149 |
def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
|
|
|
|
| 170 |
"""
|
| 171 |
|
| 172 |
# load/download data from huggingface dataset
|
| 173 |
+
metadata = try_download_dataset(dataset_id, data_files, mockdata_on_failure=True)
|
| 174 |
|
| 175 |
# make a pandas df that is compliant with folium/streamlit maps
|
| 176 |
_df = pd.DataFrame({
|