rmm commited on
Commit
882be26
·
1 Parent(s): 3e2cb2f

feat: push observations functions for multi-file handling

Browse files

- current implementation is to open the HF handle once, then prepare and
push each observation individually. Could check docs about pushing
multiple observations in one transaction.

- At present the `api.upload_file` call is commented out, just get
log/visual info about the actions

Files changed (2) hide show
  1. src/hf_push_observations.py +72 -5
  2. src/main.py +6 -0
src/hf_push_observations.py CHANGED
@@ -1,15 +1,83 @@
1
- from streamlit.delta_generator import DeltaGenerator
2
- import streamlit as st
3
- from huggingface_hub import HfApi
4
  import json
5
  import tempfile
6
  import logging
7
 
 
 
 
 
 
8
  # get a global var for logger accessor in this module
9
  LOG_LEVEL = logging.DEBUG
10
  g_logger = logging.getLogger(__name__)
11
  g_logger.setLevel(LOG_LEVEL)
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def push_observations(tab_log:DeltaGenerator=None):
14
  """
15
  Push the observations to the Hugging Face dataset
@@ -30,7 +98,6 @@ def push_observations(tab_log:DeltaGenerator=None):
30
  tab_log.info(f"Uploading observations: {metadata_str}")
31
 
32
  # get huggingface api
33
- import os
34
  token = os.environ.get("HF_TOKEN", None)
35
  api = HfApi(token=token)
36
 
@@ -53,4 +120,4 @@ def push_observations(tab_log:DeltaGenerator=None):
53
  # msg = f"observation attempted tx to repo happy walrus: {rv}"
54
  g_logger.info(msg)
55
  st.info(msg)
56
-
 
1
+ import os
 
 
2
  import json
3
  import tempfile
4
  import logging
5
 
6
+ from streamlit.delta_generator import DeltaGenerator
7
+ import streamlit as st
8
+ from huggingface_hub import HfApi, CommitInfo
9
+
10
+
11
  # get a global var for logger accessor in this module
12
  LOG_LEVEL = logging.DEBUG
13
  g_logger = logging.getLogger(__name__)
14
  g_logger.setLevel(LOG_LEVEL)
15
 
16
+ def push_observation(image_hash:str, api:HfApi) -> CommitInfo:
17
+ '''
18
+ push one observation to the Hugging Face dataset
19
+
20
+ '''
21
+ # get the observation
22
+ observation = st.session_state.public_observations.get(image_hash)
23
+ if observation is None:
24
+ msg = f"Could not find observation with hash {image_hash}"
25
+ g_logger.error(msg)
26
+ st.error(msg)
27
+ return None
28
+
29
+ # convert to json
30
+ metadata_str = json.dumps(observation) # doesn't work yet, TODO
31
+
32
+ st.toast(f"Uploading observation: {metadata_str}", icon="🦭")
33
+ tab_log = st.session_state.tab_log
34
+ if tab_log is not None:
35
+ tab_log.info(f"Uploading observation: {metadata_str}")
36
+
37
+ # write to temp file so we can send it (why is this not using context mgr?)
38
+ f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
39
+ f.write(metadata_str)
40
+ f.close()
41
+ st.info(f"temp file: {f.name} with metadata written...")
42
+
43
+ # observation['author_email']
44
+ # observation['image_md5']
45
+ path_in_repo = f"metadata/{observation['author_email']}/{observation['image_md5']}.json"
46
+
47
+ msg = f"fname: {f.name} | path: {path_in_repo}"
48
+ print(msg)
49
+ st.warning(msg)
50
+ rv = None # temp don't send anything
51
+ # rv = api.upload_file(
52
+ # path_or_fileobj=f.name,
53
+ # path_in_repo=path_in_repo,
54
+ # repo_id="Saving-Willy/temp_dataset",
55
+ # repo_type="dataset",
56
+ # )
57
+ # print(rv)
58
+ # msg = f"observation attempted tx to repo happy walrus: {rv}"
59
+ g_logger.info(msg)
60
+ st.info(msg)
61
+
62
+ return rv
63
+
64
+
65
+
66
+ def push_all_observations():
67
+ '''
68
+ open an API connection to Hugging Face, and push all observation one by one
69
+ '''
70
+
71
+ # get huggingface api
72
+ token = os.environ.get("HF_TOKEN", None)
73
+ api = HfApi(token=token)
74
+
75
+ # iterate over the list of observations
76
+ for hash in st.session_state.public_observations.keys():
77
+ rv = push_observation(hash, api)
78
+
79
+
80
+
81
  def push_observations(tab_log:DeltaGenerator=None):
82
  """
83
  Push the observations to the Hugging Face dataset
 
98
  tab_log.info(f"Uploading observations: {metadata_str}")
99
 
100
  # get huggingface api
 
101
  token = os.environ.get("HF_TOKEN", None)
102
  api = HfApi(token=token)
103
 
 
120
  # msg = f"observation attempted tx to repo happy walrus: {rv}"
121
  g_logger.info(msg)
122
  st.info(msg)
123
+
src/main.py CHANGED
@@ -24,6 +24,9 @@ from maps.obs_map import present_obs_map
24
  from utils.st_logs import setup_logging, parse_log_buffer
25
  from utils.workflow_state import WorkflowFSM, FSM_STATES
26
  from utils.workflow_ui import refresh_progress, init_workflow_viz
 
 
 
27
  #from classifier.classifier_image import cetacean_classify
28
  from classifier.classifier_image import cetacean_just_classify, cetacean_show_results_and_review, cetacean_show_results
29
 
@@ -287,6 +290,7 @@ def main() -> None:
287
  if st.button("mock: manual validation done."):
288
  st.session_state.workflow_fsm.complete_current_state()
289
  # -> manual_inspection_completed
 
290
 
291
  cetacean_show_results_and_review()
292
 
@@ -299,6 +303,8 @@ def main() -> None:
299
 
300
 
301
  if st.button("(nooop) Upload observation to THE INTERNET!"):
 
 
302
  st.session_state.workflow_fsm.complete_current_state()
303
  # -> data_uploaded
304
 
 
24
  from utils.st_logs import setup_logging, parse_log_buffer
25
  from utils.workflow_state import WorkflowFSM, FSM_STATES
26
  from utils.workflow_ui import refresh_progress, init_workflow_viz
27
+ from hf_push_observations import push_all_observations
28
+
29
+
30
  #from classifier.classifier_image import cetacean_classify
31
  from classifier.classifier_image import cetacean_just_classify, cetacean_show_results_and_review, cetacean_show_results
32
 
 
290
  if st.button("mock: manual validation done."):
291
  st.session_state.workflow_fsm.complete_current_state()
292
  # -> manual_inspection_completed
293
+ st.rerun()
294
 
295
  cetacean_show_results_and_review()
296
 
 
303
 
304
 
305
  if st.button("(nooop) Upload observation to THE INTERNET!"):
306
+ # let this go through to the push_all func, since it just reports to log for now.
307
+ push_all_observations()
308
  st.session_state.workflow_fsm.complete_current_state()
309
  # -> data_uploaded
310