sergiopaniego HF Staff commited on
Commit
ee93e86
·
verified ·
1 Parent(s): 9db8853

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/trackio_logo_old.png filter=lfs diff=lfs merge=lfs -text
__init__.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ import os
3
+ import warnings
4
+ import webbrowser
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from gradio.blocks import BUILT_IN_THEMES
9
+ from gradio.themes import Default as DefaultTheme
10
+ from gradio.themes import ThemeClass
11
+ from gradio_client import Client
12
+
13
+ from trackio import context_vars, deploy, utils
14
+ from trackio.imports import import_csv, import_tf_events
15
+ from trackio.media import TrackioImage
16
+ from trackio.run import Run
17
+ from trackio.sqlite_storage import SQLiteStorage
18
+ from trackio.ui import demo
19
+ from trackio.utils import TRACKIO_DIR, TRACKIO_LOGO_DIR
20
+
21
+ __version__ = Path(__file__).parent.joinpath("version.txt").read_text().strip()
22
+
23
+ __all__ = ["init", "log", "finish", "show", "import_csv", "import_tf_events", "Image"]
24
+
25
+ Image = TrackioImage
26
+
27
+
28
+ config = {}
29
+
30
+ DEFAULT_THEME = "citrus"
31
+
32
+
33
+ def init(
34
+ project: str,
35
+ name: str | None = None,
36
+ space_id: str | None = None,
37
+ dataset_id: str | None = None,
38
+ config: dict | None = None,
39
+ resume: str = "never",
40
+ settings: Any = None,
41
+ ) -> Run:
42
+ """
43
+ Creates a new Trackio project and returns a Run object.
44
+
45
+ Args:
46
+ project: The name of the project (can be an existing project to continue tracking or a new project to start tracking from scratch).
47
+ name: The name of the run (if not provided, a default name will be generated).
48
+ space_id: If provided, the project will be logged to a Hugging Face Space instead of a local directory. Should be a complete Space name like "username/reponame" or "orgname/reponame", or just "reponame" in which case the Space will be created in the currently-logged-in Hugging Face user's namespace. If the Space does not exist, it will be created. If the Space already exists, the project will be logged to it.
49
+ dataset_id: If a space_id is provided, a persistent Hugging Face Dataset will be created and the metrics will be synced to it every 5 minutes. Specify a Dataset with name like "username/datasetname" or "orgname/datasetname", or "datasetname" (uses currently-logged-in Hugging Face user's namespace), or None (uses the same name as the Space but with the "_dataset" suffix). If the Dataset does not exist, it will be created. If the Dataset already exists, the project will be appended to it.
50
+ config: A dictionary of configuration options. Provided for compatibility with wandb.init()
51
+ resume: Controls how to handle resuming a run. Can be one of:
52
+ - "must": Must resume the run with the given name, raises error if run doesn't exist
53
+ - "allow": Resume the run if it exists, otherwise create a new run
54
+ - "never": Never resume a run, always create a new one
55
+ settings: Not used. Provided for compatibility with wandb.init()
56
+ """
57
+ if settings is not None:
58
+ warnings.warn(
59
+ "* Warning: settings is not used. Provided for compatibility with wandb.init(). Please create an issue at: https://github.com/gradio-app/trackio/issues if you need a specific feature implemented."
60
+ )
61
+
62
+ if space_id is None and dataset_id is not None:
63
+ raise ValueError("Must provide a `space_id` when `dataset_id` is provided.")
64
+ space_id, dataset_id = utils.preprocess_space_and_dataset_ids(space_id, dataset_id)
65
+ url = context_vars.current_server.get()
66
+
67
+ if url is None:
68
+ if space_id is None:
69
+ _, url, _ = demo.launch(
70
+ show_api=False,
71
+ inline=False,
72
+ quiet=True,
73
+ prevent_thread_lock=True,
74
+ show_error=True,
75
+ )
76
+ else:
77
+ url = space_id
78
+ context_vars.current_server.set(url)
79
+
80
+ if (
81
+ context_vars.current_project.get() is None
82
+ or context_vars.current_project.get() != project
83
+ ):
84
+ print(f"* Trackio project initialized: {project}")
85
+
86
+ if dataset_id is not None:
87
+ os.environ["TRACKIO_DATASET_ID"] = dataset_id
88
+ print(
89
+ f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}"
90
+ )
91
+ if space_id is None:
92
+ print(f"* Trackio metrics logged to: {TRACKIO_DIR}")
93
+ utils.print_dashboard_instructions(project)
94
+ else:
95
+ deploy.create_space_if_not_exists(space_id, dataset_id)
96
+ print(
97
+ f"* View dashboard by going to: {deploy.SPACE_URL.format(space_id=space_id)}"
98
+ )
99
+ context_vars.current_project.set(project)
100
+
101
+ client = None
102
+ if not space_id:
103
+ client = Client(url, verbose=False)
104
+
105
+ if resume == "must":
106
+ if name is None:
107
+ raise ValueError("Must provide a run name when resume='must'")
108
+ if name not in SQLiteStorage.get_runs(project):
109
+ raise ValueError(f"Run '{name}' does not exist in project '{project}'")
110
+ elif resume == "allow":
111
+ if name is not None and name in SQLiteStorage.get_runs(project):
112
+ print(f"* Resuming existing run: {name}")
113
+ elif resume == "never":
114
+ if name is not None and name in SQLiteStorage.get_runs(project):
115
+ name = None
116
+ else:
117
+ raise ValueError("resume must be one of: 'must', 'allow', or 'never'")
118
+
119
+ run = Run(
120
+ url=url,
121
+ project=project,
122
+ client=client,
123
+ name=name,
124
+ config=config,
125
+ space_id=space_id,
126
+ )
127
+ context_vars.current_run.set(run)
128
+ globals()["config"] = run.config
129
+ return run
130
+
131
+
132
+ def log(metrics: dict, step: int | None = None) -> None:
133
+ """
134
+ Logs metrics to the current run.
135
+
136
+ Args:
137
+ metrics: A dictionary of metrics to log.
138
+ step: The step number. If not provided, the step will be incremented automatically.
139
+ """
140
+ run = context_vars.current_run.get()
141
+ if run is None:
142
+ raise RuntimeError("Call trackio.init() before trackio.log().")
143
+ run.log(
144
+ metrics=metrics,
145
+ step=step,
146
+ )
147
+
148
+
149
+ def finish():
150
+ """
151
+ Finishes the current run.
152
+ """
153
+ run = context_vars.current_run.get()
154
+ if run is None:
155
+ raise RuntimeError("Call trackio.init() before trackio.finish().")
156
+ run.finish()
157
+
158
+
159
+ def show(project: str | None = None, theme: str | ThemeClass = DEFAULT_THEME):
160
+ """
161
+ Launches the Trackio dashboard.
162
+
163
+ Args:
164
+ project: The name of the project whose runs to show. If not provided, all projects will be shown and the user can select one.
165
+ theme: The Gradio theme to use. Can be any built-in Gradio theme (e.g. "citrus", "soft", "default"), theme from the Hub (https://huggingface.co/spaces/gradio/theme-gallery), or a custom gradio.themes.ThemeClass
166
+ """
167
+ if theme != DEFAULT_THEME:
168
+ # TODO: It's a little hacky to reproduce this theme-setting logic from Gradio Blocks,
169
+ # but in Gradio 6.0, the theme will be set in `launch()` instead, which means that we
170
+ # will be able to remove this code.
171
+ if isinstance(theme, str):
172
+ if theme.lower() in BUILT_IN_THEMES:
173
+ theme = BUILT_IN_THEMES[theme.lower()]
174
+ else:
175
+ try:
176
+ theme = ThemeClass.from_hub(theme)
177
+ except Exception as e:
178
+ warnings.warn(f"Cannot load {theme}. Caught Exception: {str(e)}")
179
+ theme = DefaultTheme()
180
+ if not isinstance(theme, ThemeClass):
181
+ warnings.warn("Theme should be a class loaded from gradio.themes")
182
+ theme = DefaultTheme()
183
+ demo.theme: ThemeClass = theme
184
+ demo.theme_css = theme._get_theme_css()
185
+ demo.stylesheets = theme._stylesheets
186
+ theme_hasher = hashlib.sha256()
187
+ theme_hasher.update(demo.theme_css.encode("utf-8"))
188
+ demo.theme_hash = theme_hasher.hexdigest()
189
+
190
+ _, url, share_url = demo.launch(
191
+ show_api=False,
192
+ quiet=True,
193
+ inline=False,
194
+ prevent_thread_lock=True,
195
+ favicon_path=TRACKIO_LOGO_DIR / "trackio_logo_light.png",
196
+ allowed_paths=[TRACKIO_LOGO_DIR],
197
+ )
198
+
199
+ base_url = share_url + "/" if share_url else url
200
+ dashboard_url = base_url + f"?project={project}" if project else base_url
201
+ print(f"* Trackio UI launched at: {dashboard_url}")
202
+ webbrowser.open(dashboard_url)
203
+ utils.block_except_in_notebook()
__pycache__/__init__.cpython-312.pyc ADDED
Binary file (9.94 kB). View file
 
__pycache__/commit_scheduler.cpython-312.pyc ADDED
Binary file (18.9 kB). View file
 
__pycache__/context_vars.cpython-312.pyc ADDED
Binary file (820 Bytes). View file
 
__pycache__/deploy.cpython-312.pyc ADDED
Binary file (6.8 kB). View file
 
__pycache__/dummy_commit_scheduler.cpython-312.pyc ADDED
Binary file (1.07 kB). View file
 
__pycache__/file_storage.cpython-312.pyc ADDED
Binary file (2.83 kB). View file
 
__pycache__/imports.cpython-312.pyc ADDED
Binary file (11.8 kB). View file
 
__pycache__/media.cpython-312.pyc ADDED
Binary file (5.84 kB). View file
 
__pycache__/run.cpython-312.pyc ADDED
Binary file (6.94 kB). View file
 
__pycache__/sqlite_storage.cpython-312.pyc ADDED
Binary file (18.7 kB). View file
 
__pycache__/typehints.cpython-312.pyc ADDED
Binary file (912 Bytes). View file
 
__pycache__/ui.cpython-312.pyc ADDED
Binary file (28.2 kB). View file
 
__pycache__/utils.cpython-312.pyc ADDED
Binary file (15.4 kB). View file
 
assets/trackio_logo_dark.png ADDED
assets/trackio_logo_light.png ADDED
assets/trackio_logo_old.png ADDED

Git LFS Details

  • SHA256: 3922c4d1e465270ad4d8abb12023f3beed5d9f7f338528a4c0ac21dcf358a1c8
  • Pointer size: 131 Bytes
  • Size of remote file: 487 kB
assets/trackio_logo_type_dark.png ADDED
assets/trackio_logo_type_dark_transparent.png ADDED
assets/trackio_logo_type_light.png ADDED
assets/trackio_logo_type_light_transparent.png ADDED
cli.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ from trackio import show
4
+
5
+
6
+ def main():
7
+ parser = argparse.ArgumentParser(description="Trackio CLI")
8
+ subparsers = parser.add_subparsers(dest="command")
9
+
10
+ ui_parser = subparsers.add_parser(
11
+ "show", help="Show the Trackio dashboard UI for a project"
12
+ )
13
+ ui_parser.add_argument(
14
+ "--project", required=False, help="Project name to show in the dashboard"
15
+ )
16
+ ui_parser.add_argument(
17
+ "--theme",
18
+ required=False,
19
+ default="citrus",
20
+ help="A Gradio Theme to use for the dashboard instead of the default 'citrus', can be a built-in theme (e.g. 'soft', 'default'), a theme from the Hub (e.g. 'gstaff/xkcd').",
21
+ )
22
+
23
+ args = parser.parse_args()
24
+
25
+ if args.command == "show":
26
+ show(args.project, args.theme)
27
+ else:
28
+ parser.print_help()
29
+
30
+
31
+ if __name__ == "__main__":
32
+ main()
commit_scheduler.py ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Originally copied from https://github.com/huggingface/huggingface_hub/blob/d0a948fc2a32ed6e557042a95ef3e4af97ec4a7c/src/huggingface_hub/_commit_scheduler.py
2
+
3
+ import atexit
4
+ import logging
5
+ import os
6
+ import time
7
+ from concurrent.futures import Future
8
+ from dataclasses import dataclass
9
+ from io import SEEK_END, SEEK_SET, BytesIO
10
+ from pathlib import Path
11
+ from threading import Lock, Thread
12
+ from typing import Callable, Dict, List, Optional, Union
13
+
14
+ from huggingface_hub.hf_api import (
15
+ DEFAULT_IGNORE_PATTERNS,
16
+ CommitInfo,
17
+ CommitOperationAdd,
18
+ HfApi,
19
+ )
20
+ from huggingface_hub.utils import filter_repo_objects
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class _FileToUpload:
27
+ """Temporary dataclass to store info about files to upload. Not meant to be used directly."""
28
+
29
+ local_path: Path
30
+ path_in_repo: str
31
+ size_limit: int
32
+ last_modified: float
33
+
34
+
35
+ class CommitScheduler:
36
+ """
37
+ Scheduler to upload a local folder to the Hub at regular intervals (e.g. push to hub every 5 minutes).
38
+
39
+ The recommended way to use the scheduler is to use it as a context manager. This ensures that the scheduler is
40
+ properly stopped and the last commit is triggered when the script ends. The scheduler can also be stopped manually
41
+ with the `stop` method. Checkout the [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#scheduled-uploads)
42
+ to learn more about how to use it.
43
+
44
+ Args:
45
+ repo_id (`str`):
46
+ The id of the repo to commit to.
47
+ folder_path (`str` or `Path`):
48
+ Path to the local folder to upload regularly.
49
+ every (`int` or `float`, *optional*):
50
+ The number of minutes between each commit. Defaults to 5 minutes.
51
+ path_in_repo (`str`, *optional*):
52
+ Relative path of the directory in the repo, for example: `"checkpoints/"`. Defaults to the root folder
53
+ of the repository.
54
+ repo_type (`str`, *optional*):
55
+ The type of the repo to commit to. Defaults to `model`.
56
+ revision (`str`, *optional*):
57
+ The revision of the repo to commit to. Defaults to `main`.
58
+ private (`bool`, *optional*):
59
+ Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
60
+ token (`str`, *optional*):
61
+ The token to use to commit to the repo. Defaults to the token saved on the machine.
62
+ allow_patterns (`List[str]` or `str`, *optional*):
63
+ If provided, only files matching at least one pattern are uploaded.
64
+ ignore_patterns (`List[str]` or `str`, *optional*):
65
+ If provided, files matching any of the patterns are not uploaded.
66
+ squash_history (`bool`, *optional*):
67
+ Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is
68
+ useful to avoid degraded performances on the repo when it grows too large.
69
+ hf_api (`HfApi`, *optional*):
70
+ The [`HfApi`] client to use to commit to the Hub. Can be set with custom settings (user agent, token,...).
71
+ on_before_commit (`Callable[[], None]`, *optional*):
72
+ If specified, a function that will be called before the CommitScheduler lists files to create a commit.
73
+
74
+ Example:
75
+ ```py
76
+ >>> from pathlib import Path
77
+ >>> from huggingface_hub import CommitScheduler
78
+
79
+ # Scheduler uploads every 10 minutes
80
+ >>> csv_path = Path("watched_folder/data.csv")
81
+ >>> CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path=csv_path.parent, every=10)
82
+
83
+ >>> with csv_path.open("a") as f:
84
+ ... f.write("first line")
85
+
86
+ # Some time later (...)
87
+ >>> with csv_path.open("a") as f:
88
+ ... f.write("second line")
89
+ ```
90
+
91
+ Example using a context manager:
92
+ ```py
93
+ >>> from pathlib import Path
94
+ >>> from huggingface_hub import CommitScheduler
95
+
96
+ >>> with CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path="watched_folder", every=10) as scheduler:
97
+ ... csv_path = Path("watched_folder/data.csv")
98
+ ... with csv_path.open("a") as f:
99
+ ... f.write("first line")
100
+ ... (...)
101
+ ... with csv_path.open("a") as f:
102
+ ... f.write("second line")
103
+
104
+ # Scheduler is now stopped and last commit have been triggered
105
+ ```
106
+ """
107
+
108
+ def __init__(
109
+ self,
110
+ *,
111
+ repo_id: str,
112
+ folder_path: Union[str, Path],
113
+ every: Union[int, float] = 5,
114
+ path_in_repo: Optional[str] = None,
115
+ repo_type: Optional[str] = None,
116
+ revision: Optional[str] = None,
117
+ private: Optional[bool] = None,
118
+ token: Optional[str] = None,
119
+ allow_patterns: Optional[Union[List[str], str]] = None,
120
+ ignore_patterns: Optional[Union[List[str], str]] = None,
121
+ squash_history: bool = False,
122
+ hf_api: Optional["HfApi"] = None,
123
+ on_before_commit: Optional[Callable[[], None]] = None,
124
+ ) -> None:
125
+ self.api = hf_api or HfApi(token=token)
126
+ self.on_before_commit = on_before_commit
127
+
128
+ # Folder
129
+ self.folder_path = Path(folder_path).expanduser().resolve()
130
+ self.path_in_repo = path_in_repo or ""
131
+ self.allow_patterns = allow_patterns
132
+
133
+ if ignore_patterns is None:
134
+ ignore_patterns = []
135
+ elif isinstance(ignore_patterns, str):
136
+ ignore_patterns = [ignore_patterns]
137
+ self.ignore_patterns = ignore_patterns + DEFAULT_IGNORE_PATTERNS
138
+
139
+ if self.folder_path.is_file():
140
+ raise ValueError(
141
+ f"'folder_path' must be a directory, not a file: '{self.folder_path}'."
142
+ )
143
+ self.folder_path.mkdir(parents=True, exist_ok=True)
144
+
145
+ # Repository
146
+ repo_url = self.api.create_repo(
147
+ repo_id=repo_id, private=private, repo_type=repo_type, exist_ok=True
148
+ )
149
+ self.repo_id = repo_url.repo_id
150
+ self.repo_type = repo_type
151
+ self.revision = revision
152
+ self.token = token
153
+
154
+ self.last_uploaded: Dict[Path, float] = {}
155
+ self.last_push_time: float | None = None
156
+
157
+ if not every > 0:
158
+ raise ValueError(f"'every' must be a positive integer, not '{every}'.")
159
+ self.lock = Lock()
160
+ self.every = every
161
+ self.squash_history = squash_history
162
+
163
+ logger.info(
164
+ f"Scheduled job to push '{self.folder_path}' to '{self.repo_id}' every {self.every} minutes."
165
+ )
166
+ self._scheduler_thread = Thread(target=self._run_scheduler, daemon=True)
167
+ self._scheduler_thread.start()
168
+ atexit.register(self._push_to_hub)
169
+
170
+ self.__stopped = False
171
+
172
+ def stop(self) -> None:
173
+ """Stop the scheduler.
174
+
175
+ A stopped scheduler cannot be restarted. Mostly for tests purposes.
176
+ """
177
+ self.__stopped = True
178
+
179
+ def __enter__(self) -> "CommitScheduler":
180
+ return self
181
+
182
+ def __exit__(self, exc_type, exc_value, traceback) -> None:
183
+ # Upload last changes before exiting
184
+ self.trigger().result()
185
+ self.stop()
186
+ return
187
+
188
+ def _run_scheduler(self) -> None:
189
+ """Dumb thread waiting between each scheduled push to Hub."""
190
+ while True:
191
+ self.last_future = self.trigger()
192
+ time.sleep(self.every * 60)
193
+ if self.__stopped:
194
+ break
195
+
196
+ def trigger(self) -> Future:
197
+ """Trigger a `push_to_hub` and return a future.
198
+
199
+ This method is automatically called every `every` minutes. You can also call it manually to trigger a commit
200
+ immediately, without waiting for the next scheduled commit.
201
+ """
202
+ return self.api.run_as_future(self._push_to_hub)
203
+
204
+ def _push_to_hub(self) -> Optional[CommitInfo]:
205
+ if self.__stopped: # If stopped, already scheduled commits are ignored
206
+ return None
207
+
208
+ logger.info("(Background) scheduled commit triggered.")
209
+ try:
210
+ value = self.push_to_hub()
211
+ if self.squash_history:
212
+ logger.info("(Background) squashing repo history.")
213
+ self.api.super_squash_history(
214
+ repo_id=self.repo_id, repo_type=self.repo_type, branch=self.revision
215
+ )
216
+ return value
217
+ except Exception as e:
218
+ logger.error(
219
+ f"Error while pushing to Hub: {e}"
220
+ ) # Depending on the setup, error might be silenced
221
+ raise
222
+
223
+ def push_to_hub(self) -> Optional[CommitInfo]:
224
+ """
225
+ Push folder to the Hub and return the commit info.
226
+
227
+ <Tip warning={true}>
228
+
229
+ This method is not meant to be called directly. It is run in the background by the scheduler, respecting a
230
+ queue mechanism to avoid concurrent commits. Making a direct call to the method might lead to concurrency
231
+ issues.
232
+
233
+ </Tip>
234
+
235
+ The default behavior of `push_to_hub` is to assume an append-only folder. It lists all files in the folder and
236
+ uploads only changed files. If no changes are found, the method returns without committing anything. If you want
237
+ to change this behavior, you can inherit from [`CommitScheduler`] and override this method. This can be useful
238
+ for example to compress data together in a single file before committing. For more details and examples, check
239
+ out our [integration guide](https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#scheduled-uploads).
240
+ """
241
+ # Check files to upload (with lock)
242
+ with self.lock:
243
+ if self.on_before_commit is not None:
244
+ self.on_before_commit()
245
+
246
+ logger.debug("Listing files to upload for scheduled commit.")
247
+
248
+ # List files from folder (taken from `_prepare_upload_folder_additions`)
249
+ relpath_to_abspath = {
250
+ path.relative_to(self.folder_path).as_posix(): path
251
+ for path in sorted(
252
+ self.folder_path.glob("**/*")
253
+ ) # sorted to be deterministic
254
+ if path.is_file()
255
+ }
256
+ prefix = f"{self.path_in_repo.strip('/')}/" if self.path_in_repo else ""
257
+
258
+ # Filter with pattern + filter out unchanged files + retrieve current file size
259
+ files_to_upload: List[_FileToUpload] = []
260
+ for relpath in filter_repo_objects(
261
+ relpath_to_abspath.keys(),
262
+ allow_patterns=self.allow_patterns,
263
+ ignore_patterns=self.ignore_patterns,
264
+ ):
265
+ local_path = relpath_to_abspath[relpath]
266
+ stat = local_path.stat()
267
+ if (
268
+ self.last_uploaded.get(local_path) is None
269
+ or self.last_uploaded[local_path] != stat.st_mtime
270
+ ):
271
+ files_to_upload.append(
272
+ _FileToUpload(
273
+ local_path=local_path,
274
+ path_in_repo=prefix + relpath,
275
+ size_limit=stat.st_size,
276
+ last_modified=stat.st_mtime,
277
+ )
278
+ )
279
+
280
+ # Return if nothing to upload
281
+ if len(files_to_upload) == 0:
282
+ logger.debug("Dropping schedule commit: no changed file to upload.")
283
+ return None
284
+
285
+ # Convert `_FileToUpload` as `CommitOperationAdd` (=> compute file shas + limit to file size)
286
+ logger.debug("Removing unchanged files since previous scheduled commit.")
287
+ add_operations = [
288
+ CommitOperationAdd(
289
+ # TODO: Cap the file to its current size, even if the user append data to it while a scheduled commit is happening
290
+ # (requires an upstream fix for XET-535: `hf_xet` should support `BinaryIO` for upload)
291
+ path_or_fileobj=file_to_upload.local_path,
292
+ path_in_repo=file_to_upload.path_in_repo,
293
+ )
294
+ for file_to_upload in files_to_upload
295
+ ]
296
+
297
+ # Upload files (append mode expected - no need for lock)
298
+ logger.debug("Uploading files for scheduled commit.")
299
+ commit_info = self.api.create_commit(
300
+ repo_id=self.repo_id,
301
+ repo_type=self.repo_type,
302
+ operations=add_operations,
303
+ commit_message="Scheduled Commit",
304
+ revision=self.revision,
305
+ )
306
+
307
+ for file in files_to_upload:
308
+ self.last_uploaded[file.local_path] = file.last_modified
309
+
310
+ self.last_push_time = time.time()
311
+
312
+ return commit_info
313
+
314
+
315
+ class PartialFileIO(BytesIO):
316
+ """A file-like object that reads only the first part of a file.
317
+
318
+ Useful to upload a file to the Hub when the user might still be appending data to it. Only the first part of the
319
+ file is uploaded (i.e. the part that was available when the filesystem was first scanned).
320
+
321
+ In practice, only used internally by the CommitScheduler to regularly push a folder to the Hub with minimal
322
+ disturbance for the user. The object is passed to `CommitOperationAdd`.
323
+
324
+ Only supports `read`, `tell` and `seek` methods.
325
+
326
+ Args:
327
+ file_path (`str` or `Path`):
328
+ Path to the file to read.
329
+ size_limit (`int`):
330
+ The maximum number of bytes to read from the file. If the file is larger than this, only the first part
331
+ will be read (and uploaded).
332
+ """
333
+
334
+ def __init__(self, file_path: Union[str, Path], size_limit: int) -> None:
335
+ self._file_path = Path(file_path)
336
+ self._file = self._file_path.open("rb")
337
+ self._size_limit = min(size_limit, os.fstat(self._file.fileno()).st_size)
338
+
339
+ def __del__(self) -> None:
340
+ self._file.close()
341
+ return super().__del__()
342
+
343
+ def __repr__(self) -> str:
344
+ return (
345
+ f"<PartialFileIO file_path={self._file_path} size_limit={self._size_limit}>"
346
+ )
347
+
348
+ def __len__(self) -> int:
349
+ return self._size_limit
350
+
351
+ def __getattribute__(self, name: str):
352
+ if name.startswith("_") or name in (
353
+ "read",
354
+ "tell",
355
+ "seek",
356
+ ): # only 3 public methods supported
357
+ return super().__getattribute__(name)
358
+ raise NotImplementedError(f"PartialFileIO does not support '{name}'.")
359
+
360
+ def tell(self) -> int:
361
+ """Return the current file position."""
362
+ return self._file.tell()
363
+
364
+ def seek(self, __offset: int, __whence: int = SEEK_SET) -> int:
365
+ """Change the stream position to the given offset.
366
+
367
+ Behavior is the same as a regular file, except that the position is capped to the size limit.
368
+ """
369
+ if __whence == SEEK_END:
370
+ # SEEK_END => set from the truncated end
371
+ __offset = len(self) + __offset
372
+ __whence = SEEK_SET
373
+
374
+ pos = self._file.seek(__offset, __whence)
375
+ if pos > self._size_limit:
376
+ return self._file.seek(self._size_limit)
377
+ return pos
378
+
379
+ def read(self, __size: Optional[int] = -1) -> bytes:
380
+ """Read at most `__size` bytes from the file.
381
+
382
+ Behavior is the same as a regular file, except that it is capped to the size limit.
383
+ """
384
+ current = self._file.tell()
385
+ if __size is None or __size < 0:
386
+ # Read until file limit
387
+ truncated_size = self._size_limit - current
388
+ else:
389
+ # Read until file limit or __size
390
+ truncated_size = min(__size, self._size_limit - current)
391
+ return self._file.read(truncated_size)
context_vars.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import contextvars
2
+ from typing import TYPE_CHECKING
3
+
4
+ if TYPE_CHECKING:
5
+ from trackio.run import Run
6
+
7
+ current_run: contextvars.ContextVar["Run | None"] = contextvars.ContextVar(
8
+ "current_run", default=None
9
+ )
10
+ current_project: contextvars.ContextVar[str | None] = contextvars.ContextVar(
11
+ "current_project", default=None
12
+ )
13
+ current_server: contextvars.ContextVar[str | None] = contextvars.ContextVar(
14
+ "current_server", default=None
15
+ )
deploy.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+ import time
4
+ from importlib.resources import files
5
+ from pathlib import Path
6
+
7
+ import gradio
8
+ import huggingface_hub
9
+ from gradio_client import Client, handle_file
10
+ from httpx import ReadTimeout
11
+ from huggingface_hub.errors import RepositoryNotFoundError
12
+ from requests import HTTPError
13
+
14
+ from trackio.sqlite_storage import SQLiteStorage
15
+
16
+ SPACE_URL = "https://huggingface.co/spaces/{space_id}"
17
+ PERSISTENT_STORAGE_DIR = "/data/.huggingface/trackio"
18
+
19
+
20
+ def deploy_as_space(
21
+ space_id: str,
22
+ dataset_id: str | None = None,
23
+ ):
24
+ if (
25
+ os.getenv("SYSTEM") == "spaces"
26
+ ): # in case a repo with this function is uploaded to spaces
27
+ return
28
+
29
+ trackio_path = files("trackio")
30
+
31
+ hf_api = huggingface_hub.HfApi()
32
+
33
+ try:
34
+ huggingface_hub.create_repo(
35
+ space_id,
36
+ space_sdk="gradio",
37
+ repo_type="space",
38
+ exist_ok=True,
39
+ )
40
+ except HTTPError as e:
41
+ if e.response.status_code in [401, 403]: # unauthorized or forbidden
42
+ print("Need 'write' access token to create a Spaces repo.")
43
+ huggingface_hub.login(add_to_git_credential=False)
44
+ huggingface_hub.create_repo(
45
+ space_id,
46
+ space_sdk="gradio",
47
+ repo_type="space",
48
+ exist_ok=True,
49
+ )
50
+ else:
51
+ raise ValueError(f"Failed to create Space: {e}")
52
+
53
+ with open(Path(trackio_path, "README.md"), "r") as f:
54
+ readme_content = f.read()
55
+ readme_content = readme_content.replace("{GRADIO_VERSION}", gradio.__version__)
56
+ readme_buffer = io.BytesIO(readme_content.encode("utf-8"))
57
+ hf_api.upload_file(
58
+ path_or_fileobj=readme_buffer,
59
+ path_in_repo="README.md",
60
+ repo_id=space_id,
61
+ repo_type="space",
62
+ )
63
+
64
+ # We can assume pandas, gradio, and huggingface-hub are already installed in a Gradio Space.
65
+ # Make sure necessary dependencies are installed by creating a requirements.txt.
66
+ requirements_content = """
67
+ pyarrow>=21.0
68
+ """
69
+ requirements_buffer = io.BytesIO(requirements_content.encode("utf-8"))
70
+ hf_api.upload_file(
71
+ path_or_fileobj=requirements_buffer,
72
+ path_in_repo="requirements.txt",
73
+ repo_id=space_id,
74
+ repo_type="space",
75
+ )
76
+
77
+ huggingface_hub.utils.disable_progress_bars()
78
+ hf_api.upload_folder(
79
+ repo_id=space_id,
80
+ repo_type="space",
81
+ folder_path=trackio_path,
82
+ ignore_patterns=["README.md"],
83
+ )
84
+
85
+ huggingface_hub.add_space_variable(space_id, "TRACKIO_DIR", PERSISTENT_STORAGE_DIR)
86
+ if hf_token := huggingface_hub.utils.get_token():
87
+ huggingface_hub.add_space_secret(space_id, "HF_TOKEN", hf_token)
88
+ if dataset_id is not None:
89
+ huggingface_hub.add_space_variable(space_id, "TRACKIO_DATASET_ID", dataset_id)
90
+
91
+
92
+ def create_space_if_not_exists(
93
+ space_id: str,
94
+ dataset_id: str | None = None,
95
+ ) -> None:
96
+ """
97
+ Creates a new Hugging Face Space if it does not exist. If a dataset_id is provided, it will be added as a space variable.
98
+
99
+ Args:
100
+ space_id: The ID of the Space to create.
101
+ dataset_id: The ID of the Dataset to add to the Space.
102
+ """
103
+ if "/" not in space_id:
104
+ raise ValueError(
105
+ f"Invalid space ID: {space_id}. Must be in the format: username/reponame or orgname/reponame."
106
+ )
107
+ if dataset_id is not None and "/" not in dataset_id:
108
+ raise ValueError(
109
+ f"Invalid dataset ID: {dataset_id}. Must be in the format: username/datasetname or orgname/datasetname."
110
+ )
111
+ try:
112
+ huggingface_hub.repo_info(space_id, repo_type="space")
113
+ print(f"* Found existing space: {SPACE_URL.format(space_id=space_id)}")
114
+ if dataset_id is not None:
115
+ huggingface_hub.add_space_variable(
116
+ space_id, "TRACKIO_DATASET_ID", dataset_id
117
+ )
118
+ return
119
+ except RepositoryNotFoundError:
120
+ pass
121
+ except HTTPError as e:
122
+ if e.response.status_code in [401, 403]: # unauthorized or forbidden
123
+ print("Need 'write' access token to create a Spaces repo.")
124
+ huggingface_hub.login(add_to_git_credential=False)
125
+ huggingface_hub.add_space_variable(
126
+ space_id, "TRACKIO_DATASET_ID", dataset_id
127
+ )
128
+ else:
129
+ raise ValueError(f"Failed to create Space: {e}")
130
+
131
+ print(f"* Creating new space: {SPACE_URL.format(space_id=space_id)}")
132
+ deploy_as_space(space_id, dataset_id)
133
+
134
+
135
+ def wait_until_space_exists(
136
+ space_id: str,
137
+ ) -> None:
138
+ """
139
+ Blocks the current thread until the space exists.
140
+ May raise a TimeoutError if this takes quite a while.
141
+
142
+ Args:
143
+ space_id: The ID of the Space to wait for.
144
+ """
145
+ delay = 1
146
+ for _ in range(10):
147
+ try:
148
+ Client(space_id, verbose=False)
149
+ return
150
+ except (ReadTimeout, ValueError):
151
+ time.sleep(delay)
152
+ delay = min(delay * 2, 30)
153
+ raise TimeoutError("Waiting for space to exist took longer than expected")
154
+
155
+
156
+ def upload_db_to_space(project: str, space_id: str) -> None:
157
+ """
158
+ Uploads the database of a local Trackio project to a Hugging Face Space.
159
+
160
+ Args:
161
+ project: The name of the project to upload.
162
+ space_id: The ID of the Space to upload to.
163
+ """
164
+ db_path = SQLiteStorage.get_project_db_path(project)
165
+ client = Client(space_id, verbose=False)
166
+ client.predict(
167
+ api_name="/upload_db_to_space",
168
+ project=project,
169
+ uploaded_db=handle_file(db_path),
170
+ hf_token=huggingface_hub.utils.get_token(),
171
+ )
dummy_commit_scheduler.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # A dummy object to fit the interface of huggingface_hub's CommitScheduler
2
+ class DummyCommitSchedulerLock:
3
+ def __enter__(self):
4
+ return None
5
+
6
+ def __exit__(self, exception_type, exception_value, exception_traceback):
7
+ pass
8
+
9
+
10
+ class DummyCommitScheduler:
11
+ def __init__(self):
12
+ self.lock = DummyCommitSchedulerLock()
file_storage.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from PIL import Image as PILImage
4
+
5
+ try: # absolute imports when installed
6
+ from trackio.utils import TRACKIO_DIR
7
+ except ImportError: # relative imports for local execution on Spaces
8
+ from utils import TRACKIO_DIR
9
+
10
+
11
+ class FileStorage:
12
+ @staticmethod
13
+ def get_project_media_path(
14
+ project: str,
15
+ run: str | None = None,
16
+ step: int | None = None,
17
+ filename: str | None = None,
18
+ ) -> Path:
19
+ if filename is not None and step is None:
20
+ raise ValueError("filename requires step")
21
+ if step is not None and run is None:
22
+ raise ValueError("step requires run")
23
+
24
+ path = TRACKIO_DIR / "media" / project
25
+ if run:
26
+ path /= run
27
+ if step is not None:
28
+ path /= str(step)
29
+ if filename:
30
+ path /= filename
31
+ return path
32
+
33
+ @staticmethod
34
+ def init_project_media_path(
35
+ project: str, run: str | None = None, step: int | None = None
36
+ ) -> Path:
37
+ path = FileStorage.get_project_media_path(project, run, step)
38
+ path.mkdir(parents=True, exist_ok=True)
39
+ return path
40
+
41
+ @staticmethod
42
+ def save_image(
43
+ image: PILImage.Image,
44
+ project: str,
45
+ run: str,
46
+ step: int,
47
+ filename: str,
48
+ format: str = "PNG",
49
+ ) -> Path:
50
+ path = FileStorage.init_project_media_path(project, run, step) / filename
51
+ image.save(path, format=format)
52
+ return path
53
+
54
+ @staticmethod
55
+ def get_image(project: str, run: str, step: int, filename: str) -> PILImage.Image:
56
+ path = FileStorage.get_project_media_path(project, run, step, filename)
57
+ if not path.exists():
58
+ raise FileNotFoundError(f"Image file not found: {path}")
59
+ return PILImage.open(path).convert("RGBA")
imports.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+
6
+ from trackio import deploy, utils
7
+ from trackio.sqlite_storage import SQLiteStorage
8
+
9
+
10
+ def import_csv(
11
+ csv_path: str | Path,
12
+ project: str,
13
+ name: str | None = None,
14
+ space_id: str | None = None,
15
+ dataset_id: str | None = None,
16
+ ) -> None:
17
+ """
18
+ Imports a CSV file into a Trackio project. The CSV file must contain a "step" column, may optionally
19
+ contain a "timestamp" column, and any other columns will be treated as metrics. It should also include
20
+ a header row with the column names.
21
+
22
+ TODO: call init() and return a Run object so that the user can continue to log metrics to it.
23
+
24
+ Args:
25
+ csv_path: The str or Path to the CSV file to import.
26
+ project: The name of the project to import the CSV file into. Must not be an existing project.
27
+ name: The name of the Run to import the CSV file into. If not provided, a default name will be generated.
28
+ name: The name of the run (if not provided, a default name will be generated).
29
+ space_id: If provided, the project will be logged to a Hugging Face Space instead of a local directory. Should be a complete Space name like "username/reponame" or "orgname/reponame", or just "reponame" in which case the Space will be created in the currently-logged-in Hugging Face user's namespace. If the Space does not exist, it will be created. If the Space already exists, the project will be logged to it.
30
+ dataset_id: If provided, a persistent Hugging Face Dataset will be created and the metrics will be synced to it every 5 minutes. Should be a complete Dataset name like "username/datasetname" or "orgname/datasetname", or just "datasetname" in which case the Dataset will be created in the currently-logged-in Hugging Face user's namespace. If the Dataset does not exist, it will be created. If the Dataset already exists, the project will be appended to it. If not provided, the metrics will be logged to a local SQLite database, unless a `space_id` is provided, in which case a Dataset will be automatically created with the same name as the Space but with the "_dataset" suffix.
31
+ """
32
+ if SQLiteStorage.get_runs(project):
33
+ raise ValueError(
34
+ f"Project '{project}' already exists. Cannot import CSV into existing project."
35
+ )
36
+
37
+ csv_path = Path(csv_path)
38
+ if not csv_path.exists():
39
+ raise FileNotFoundError(f"CSV file not found: {csv_path}")
40
+
41
+ df = pd.read_csv(csv_path)
42
+ if df.empty:
43
+ raise ValueError("CSV file is empty")
44
+
45
+ column_mapping = utils.simplify_column_names(df.columns.tolist())
46
+ df = df.rename(columns=column_mapping)
47
+
48
+ step_column = None
49
+ for col in df.columns:
50
+ if col.lower() == "step":
51
+ step_column = col
52
+ break
53
+
54
+ if step_column is None:
55
+ raise ValueError("CSV file must contain a 'step' or 'Step' column")
56
+
57
+ if name is None:
58
+ name = csv_path.stem
59
+
60
+ metrics_list = []
61
+ steps = []
62
+ timestamps = []
63
+
64
+ numeric_columns = []
65
+ for column in df.columns:
66
+ if column == step_column:
67
+ continue
68
+ if column == "timestamp":
69
+ continue
70
+
71
+ try:
72
+ pd.to_numeric(df[column], errors="raise")
73
+ numeric_columns.append(column)
74
+ except (ValueError, TypeError):
75
+ continue
76
+
77
+ for _, row in df.iterrows():
78
+ metrics = {}
79
+ for column in numeric_columns:
80
+ value = row[column]
81
+ if bool(pd.notna(value)):
82
+ metrics[column] = float(value)
83
+
84
+ if metrics:
85
+ metrics_list.append(metrics)
86
+ steps.append(int(row[step_column]))
87
+
88
+ if "timestamp" in df.columns and bool(pd.notna(row["timestamp"])):
89
+ timestamps.append(str(row["timestamp"]))
90
+ else:
91
+ timestamps.append("")
92
+
93
+ if metrics_list:
94
+ SQLiteStorage.bulk_log(
95
+ project=project,
96
+ run=name,
97
+ metrics_list=metrics_list,
98
+ steps=steps,
99
+ timestamps=timestamps,
100
+ )
101
+
102
+ print(
103
+ f"* Imported {len(metrics_list)} rows from {csv_path} into project '{project}' as run '{name}'"
104
+ )
105
+ print(f"* Metrics found: {', '.join(metrics_list[0].keys())}")
106
+
107
+ space_id, dataset_id = utils.preprocess_space_and_dataset_ids(space_id, dataset_id)
108
+ if dataset_id is not None:
109
+ os.environ["TRACKIO_DATASET_ID"] = dataset_id
110
+ print(f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}")
111
+
112
+ if space_id is None:
113
+ utils.print_dashboard_instructions(project)
114
+ else:
115
+ deploy.create_space_if_not_exists(space_id, dataset_id)
116
+ deploy.wait_until_space_exists(space_id)
117
+ deploy.upload_db_to_space(project, space_id)
118
+ print(
119
+ f"* View dashboard by going to: {deploy.SPACE_URL.format(space_id=space_id)}"
120
+ )
121
+
122
+
123
+ def import_tf_events(
124
+ log_dir: str | Path,
125
+ project: str,
126
+ name: str | None = None,
127
+ space_id: str | None = None,
128
+ dataset_id: str | None = None,
129
+ ) -> None:
130
+ """
131
+ Imports TensorFlow Events files from a directory into a Trackio project.
132
+ Each subdirectory in the log directory will be imported as a separate run.
133
+
134
+ Args:
135
+ log_dir: The str or Path to the directory containing TensorFlow Events files.
136
+ project: The name of the project to import the TensorFlow Events files into. Must not be an existing project.
137
+ name: The name prefix for runs (if not provided, will use directory names). Each subdirectory will create a separate run.
138
+ space_id: If provided, the project will be logged to a Hugging Face Space instead of a local directory. Should be a complete Space name like username/reponame" ororgname/reponame", or just "reponame" in which case the Space will be created in the currently-logged-in Hugging Face user's namespace. If the Space does not exist, it will be created. If the Space already exists, the project will be logged to it.
139
+ dataset_id: If provided, a persistent Hugging Face Dataset will be created and the metrics will be synced to it every 5 minutes. Should be a complete Dataset name likeusername/datasetname" or "orgname/datasetname", or just "datasetname" in which case the Dataset will be created in the currently-logged-in Hugging Face user's namespace. If the Dataset does not exist, it will be created. If the Dataset already exists, the project will be appended to it. If not provided, the metrics will be logged to a local SQLite database, unless a `space_id` is provided, in which case a Dataset will be automatically created with the same name as the Space but with the_dataset suffix.
140
+ """
141
+ try:
142
+ from tbparse import SummaryReader
143
+ except ImportError:
144
+ raise ImportError(
145
+ "The `tbparse` package is not installed but is required for `import_tf_events`. Please install trackio with the `tensorboard` extra: `pip install trackio[tensorboard]`."
146
+ )
147
+
148
+ if SQLiteStorage.get_runs(project):
149
+ raise ValueError(
150
+ f"Project '{project}' already exists. Cannot import TF events into existing project."
151
+ )
152
+
153
+ path = Path(log_dir)
154
+ if not path.exists():
155
+ raise FileNotFoundError(f"TF events directory not found: {path}")
156
+
157
+ # Use tbparse to read all tfevents files in the directory structure
158
+ reader = SummaryReader(str(path), extra_columns={"dir_name"})
159
+ df = reader.scalars
160
+
161
+ if df.empty:
162
+ raise ValueError(f"No TensorFlow events data found in {path}")
163
+
164
+ total_imported = 0
165
+ imported_runs = []
166
+
167
+ # Group by dir_name to create separate runs
168
+ for dir_name, group_df in df.groupby("dir_name"):
169
+ try:
170
+ # Determine run name based on directory name
171
+ if dir_name == "":
172
+ run_name = "main" # For files in the root directory
173
+ else:
174
+ run_name = dir_name # Use directory name
175
+
176
+ if name:
177
+ run_name = f"{name}_{run_name}"
178
+
179
+ if group_df.empty:
180
+ print(f"* Skipping directory {dir_name}: no scalar data found")
181
+ continue
182
+
183
+ metrics_list = []
184
+ steps = []
185
+ timestamps = []
186
+
187
+ for _, row in group_df.iterrows():
188
+ # Convert row values to appropriate types
189
+ tag = str(row["tag"])
190
+ value = float(row["value"])
191
+ step = int(row["step"])
192
+
193
+ metrics = {tag: value}
194
+ metrics_list.append(metrics)
195
+ steps.append(step)
196
+
197
+ # Use wall_time if present, else fallback
198
+ if "wall_time" in group_df.columns and not bool(
199
+ pd.isna(row["wall_time"])
200
+ ):
201
+ timestamps.append(str(row["wall_time"]))
202
+ else:
203
+ timestamps.append("")
204
+
205
+ if metrics_list:
206
+ SQLiteStorage.bulk_log(
207
+ project=project,
208
+ run=str(run_name),
209
+ metrics_list=metrics_list,
210
+ steps=steps,
211
+ timestamps=timestamps,
212
+ )
213
+
214
+ total_imported += len(metrics_list)
215
+ imported_runs.append(run_name)
216
+
217
+ print(
218
+ f"* Imported {len(metrics_list)} scalar events from directory '{dir_name}' as run '{run_name}'"
219
+ )
220
+ print(f"* Metrics in this run: {', '.join(set(group_df['tag']))}")
221
+
222
+ except Exception as e:
223
+ print(f"* Error processing directory {dir_name}: {e}")
224
+ continue
225
+
226
+ if not imported_runs:
227
+ raise ValueError("No valid TensorFlow events data could be imported")
228
+
229
+ print(f"* Total imported events: {total_imported}")
230
+ print(f"* Created runs: {', '.join(imported_runs)}")
231
+
232
+ space_id, dataset_id = utils.preprocess_space_and_dataset_ids(space_id, dataset_id)
233
+ if dataset_id is not None:
234
+ os.environ["TRACKIO_DATASET_ID"] = dataset_id
235
+ print(f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}")
236
+
237
+ if space_id is None:
238
+ utils.print_dashboard_instructions(project)
239
+ else:
240
+ deploy.create_space_if_not_exists(space_id, dataset_id)
241
+ deploy.wait_until_space_exists(space_id)
242
+ deploy.upload_db_to_space(project, space_id)
243
+ print(
244
+ f"* View dashboard by going to: {deploy.SPACE_URL.format(space_id=space_id)}"
245
+ )
media.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ from pathlib import Path
3
+
4
+ import numpy as np
5
+ from PIL import Image as PILImage
6
+
7
+ try: # absolute imports when installed
8
+ from trackio.file_storage import FileStorage
9
+ from trackio.utils import TRACKIO_DIR
10
+ except ImportError: # relative imports for local execution on Spaces
11
+ from file_storage import FileStorage
12
+ from utils import TRACKIO_DIR
13
+
14
+
15
+ class TrackioImage:
16
+ """
17
+ Creates an image that can be logged with trackio.
18
+
19
+ Demo: fake-training-images
20
+ """
21
+
22
+ TYPE = "trackio.image"
23
+
24
+ def __init__(
25
+ self, value: str | np.ndarray | PILImage.Image, caption: str | None = None
26
+ ):
27
+ """
28
+ Parameters:
29
+ value: A string path to an image, a numpy array, or a PIL Image.
30
+ caption: A string caption for the image.
31
+ """
32
+ self.caption = caption
33
+ self._pil = TrackioImage._as_pil(value)
34
+ self._file_path: Path | None = None
35
+ self._file_format: str | None = None
36
+
37
+ @staticmethod
38
+ def _as_pil(value: str | np.ndarray | PILImage.Image) -> PILImage.Image:
39
+ try:
40
+ if isinstance(value, str):
41
+ return PILImage.open(value).convert("RGBA")
42
+ elif isinstance(value, np.ndarray):
43
+ arr = np.asarray(value).astype("uint8")
44
+ return PILImage.fromarray(arr).convert("RGBA")
45
+ elif isinstance(value, PILImage.Image):
46
+ return value.convert("RGBA")
47
+ except Exception as e:
48
+ raise ValueError(f"Failed to process image data: {value}") from e
49
+
50
+ def _save(self, project: str, run: str, step: int = 0, format: str = "PNG") -> str:
51
+ if not self._file_path:
52
+ # Save image as {TRACKIO_DIR}/media/{project}/{run}/{step}/{uuid}.{ext}
53
+ filename = f"{uuid.uuid4()}.{format.lower()}"
54
+ path = FileStorage.save_image(
55
+ self._pil, project, run, step, filename, format=format
56
+ )
57
+ self._file_path = path.relative_to(TRACKIO_DIR)
58
+ self._file_format = format
59
+ return str(self._file_path)
60
+
61
+ def _get_relative_file_path(self) -> Path | None:
62
+ return self._file_path
63
+
64
+ def _get_absolute_file_path(self) -> Path | None:
65
+ return TRACKIO_DIR / self._file_path
66
+
67
+ def _to_dict(self) -> dict:
68
+ if not self._file_path:
69
+ raise ValueError("Image must be saved to file before serialization")
70
+ return {
71
+ "_type": self.TYPE,
72
+ "file_path": str(self._get_relative_file_path()),
73
+ "file_format": self._file_format,
74
+ "caption": self.caption,
75
+ }
76
+
77
+ @classmethod
78
+ def _from_dict(cls, obj: dict) -> "TrackioImage":
79
+ if not isinstance(obj, dict):
80
+ raise TypeError(f"Expected dict, got {type(obj).__name__}")
81
+ if obj.get("_type") != cls.TYPE:
82
+ raise ValueError(f"Wrong _type: {obj.get('_type')!r}")
83
+
84
+ file_path = obj.get("file_path")
85
+ if not isinstance(file_path, str):
86
+ raise TypeError(
87
+ f"'file_path' must be string, got {type(file_path).__name__}"
88
+ )
89
+
90
+ absolute_path = TRACKIO_DIR / file_path
91
+ try:
92
+ if not absolute_path.is_file():
93
+ raise ValueError(f"Image file not found: {file_path}")
94
+ pil = PILImage.open(absolute_path).convert("RGBA")
95
+ instance = cls(pil, caption=obj.get("caption"))
96
+ instance._file_path = Path(file_path)
97
+ instance._file_format = obj.get("file_format")
98
+ return instance
99
+ except Exception as e:
100
+ raise ValueError(f"Failed to load image from file: {absolute_path}") from e
py.typed ADDED
File without changes
run.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import threading
2
+ import time
3
+
4
+ import huggingface_hub
5
+ from gradio_client import Client, handle_file
6
+
7
+ from trackio.media import TrackioImage
8
+ from trackio.sqlite_storage import SQLiteStorage
9
+ from trackio.typehints import LogEntry, UploadEntry
10
+ from trackio.utils import RESERVED_KEYS, fibo, generate_readable_name
11
+
12
+ BATCH_SEND_INTERVAL = 0.5
13
+
14
+
15
+ class Run:
16
+ def __init__(
17
+ self,
18
+ url: str,
19
+ project: str,
20
+ client: Client | None,
21
+ name: str | None = None,
22
+ config: dict | None = None,
23
+ space_id: str | None = None,
24
+ ):
25
+ self.url = url
26
+ self.project = project
27
+ self._client_lock = threading.Lock()
28
+ self._client_thread = None
29
+ self._client = client
30
+ self._space_id = space_id
31
+ self.name = name or generate_readable_name(
32
+ SQLiteStorage.get_runs(project), space_id
33
+ )
34
+ self.config = config or {}
35
+ self._queued_logs: list[LogEntry] = []
36
+ self._queued_uploads: list[UploadEntry] = []
37
+ self._stop_flag = threading.Event()
38
+
39
+ self._client_thread = threading.Thread(target=self._init_client_background)
40
+ self._client_thread.daemon = True
41
+ self._client_thread.start()
42
+
43
+ def _batch_sender(self):
44
+ """Send batched logs every BATCH_SEND_INTERVAL."""
45
+ while not self._stop_flag.is_set() or len(self._queued_logs) > 0:
46
+ # If the stop flag has been set, then just quickly send all
47
+ # the logs and exit.
48
+ if not self._stop_flag.is_set():
49
+ time.sleep(BATCH_SEND_INTERVAL)
50
+
51
+ with self._client_lock:
52
+ if self._queued_logs and self._client is not None:
53
+ logs_to_send = self._queued_logs.copy()
54
+ self._queued_logs.clear()
55
+ self._client.predict(
56
+ api_name="/bulk_log",
57
+ logs=logs_to_send,
58
+ hf_token=huggingface_hub.utils.get_token(),
59
+ )
60
+ if self._queued_uploads and self._client is not None:
61
+ uploads_to_send = self._queued_uploads.copy()
62
+ self._queued_uploads.clear()
63
+ self._client.predict(
64
+ api_name="/bulk_upload_media",
65
+ uploads=uploads_to_send,
66
+ hf_token=huggingface_hub.utils.get_token(),
67
+ )
68
+
69
+ def _init_client_background(self):
70
+ if self._client is None:
71
+ fib = fibo()
72
+ for sleep_coefficient in fib:
73
+ try:
74
+ client = Client(self.url, verbose=False)
75
+
76
+ with self._client_lock:
77
+ self._client = client
78
+ break
79
+ except Exception:
80
+ pass
81
+ if sleep_coefficient is not None:
82
+ time.sleep(0.1 * sleep_coefficient)
83
+
84
+ self._batch_sender()
85
+
86
+ def _process_media(self, metrics, step: int | None) -> dict:
87
+ """
88
+ Serialize media in metrics and upload to space if needed.
89
+ """
90
+ serializable_metrics = {}
91
+ if not step:
92
+ step = 0
93
+ for key, value in metrics.items():
94
+ if isinstance(value, TrackioImage):
95
+ value._save(self.project, self.name, step)
96
+ serializable_metrics[key] = value._to_dict()
97
+ if self._space_id:
98
+ # Upload local media when deploying to space
99
+ upload_entry: UploadEntry = {
100
+ "project": self.project,
101
+ "run": self.name,
102
+ "step": step,
103
+ "uploaded_file": handle_file(value._get_absolute_file_path()),
104
+ }
105
+ with self._client_lock:
106
+ self._queued_uploads.append(upload_entry)
107
+ else:
108
+ serializable_metrics[key] = value
109
+ return serializable_metrics
110
+
111
+ def log(self, metrics: dict, step: int | None = None):
112
+ for k in metrics.keys():
113
+ if k in RESERVED_KEYS or k.startswith("__"):
114
+ raise ValueError(
115
+ f"Please do not use this reserved key as a metric: {k}"
116
+ )
117
+
118
+ metrics = self._process_media(metrics, step)
119
+ log_entry: LogEntry = {
120
+ "project": self.project,
121
+ "run": self.name,
122
+ "metrics": metrics,
123
+ "step": step,
124
+ }
125
+
126
+ with self._client_lock:
127
+ self._queued_logs.append(log_entry)
128
+
129
+ def finish(self):
130
+ """Cleanup when run is finished."""
131
+ self._stop_flag.set()
132
+
133
+ # Wait for the batch sender to finish before joining the client thread.
134
+ time.sleep(2 * BATCH_SEND_INTERVAL)
135
+
136
+ if self._client_thread is not None:
137
+ print(
138
+ f"* Run finished. Uploading logs to Trackio Space: {self.url} (please wait...)"
139
+ )
140
+ self._client_thread.join()
sqlite_storage.py ADDED
@@ -0,0 +1,384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import sqlite3
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from threading import Lock
7
+
8
+ import huggingface_hub as hf
9
+ import pandas as pd
10
+
11
+ try: # absolute imports when installed
12
+ from trackio.commit_scheduler import CommitScheduler
13
+ from trackio.dummy_commit_scheduler import DummyCommitScheduler
14
+ from trackio.utils import TRACKIO_DIR
15
+ except Exception: # relative imports for local execution on Spaces
16
+ from commit_scheduler import CommitScheduler
17
+ from dummy_commit_scheduler import DummyCommitScheduler
18
+ from utils import TRACKIO_DIR
19
+
20
+
21
+ class SQLiteStorage:
22
+ _dataset_import_attempted = False
23
+ _current_scheduler: CommitScheduler | DummyCommitScheduler | None = None
24
+ _scheduler_lock = Lock()
25
+
26
+ @staticmethod
27
+ def _get_connection(db_path: Path) -> sqlite3.Connection:
28
+ conn = sqlite3.connect(str(db_path))
29
+ conn.row_factory = sqlite3.Row
30
+ return conn
31
+
32
+ @staticmethod
33
+ def get_project_db_filename(project: str) -> Path:
34
+ """Get the database filename for a specific project."""
35
+ safe_project_name = "".join(
36
+ c for c in project if c.isalnum() or c in ("-", "_")
37
+ ).rstrip()
38
+ if not safe_project_name:
39
+ safe_project_name = "default"
40
+ return f"{safe_project_name}.db"
41
+
42
+ @staticmethod
43
+ def get_project_db_path(project: str) -> Path:
44
+ """Get the database path for a specific project."""
45
+ filename = SQLiteStorage.get_project_db_filename(project)
46
+ return TRACKIO_DIR / filename
47
+
48
+ @staticmethod
49
+ def init_db(project: str) -> Path:
50
+ """
51
+ Initialize the SQLite database with required tables.
52
+ If there is a dataset ID provided, copies from that dataset instead.
53
+ Returns the database path.
54
+ """
55
+ db_path = SQLiteStorage.get_project_db_path(project)
56
+ db_path.parent.mkdir(parents=True, exist_ok=True)
57
+ with SQLiteStorage.get_scheduler().lock:
58
+ with sqlite3.connect(db_path) as conn:
59
+ cursor = conn.cursor()
60
+ cursor.execute("""
61
+ CREATE TABLE IF NOT EXISTS metrics (
62
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
63
+ timestamp TEXT NOT NULL,
64
+ run_name TEXT NOT NULL,
65
+ step INTEGER NOT NULL,
66
+ metrics TEXT NOT NULL
67
+ )
68
+ """)
69
+ cursor.execute(
70
+ """
71
+ CREATE INDEX IF NOT EXISTS idx_metrics_run_step
72
+ ON metrics(run_name, step)
73
+ """
74
+ )
75
+ conn.commit()
76
+ return db_path
77
+
78
+ @staticmethod
79
+ def export_to_parquet():
80
+ """
81
+ Exports all projects' DB files as Parquet under the same path but with extension ".parquet".
82
+ """
83
+ # don't attempt to export (potentially wrong/blank) data before importing for the first time
84
+ if not SQLiteStorage._dataset_import_attempted:
85
+ return
86
+ all_paths = os.listdir(TRACKIO_DIR)
87
+ db_paths = [f for f in all_paths if f.endswith(".db")]
88
+ for db_path in db_paths:
89
+ db_path = TRACKIO_DIR / db_path
90
+ parquet_path = db_path.with_suffix(".parquet")
91
+ if (not parquet_path.exists()) or (
92
+ db_path.stat().st_mtime > parquet_path.stat().st_mtime
93
+ ):
94
+ with sqlite3.connect(db_path) as conn:
95
+ df = pd.read_sql("SELECT * from metrics", conn)
96
+ # break out the single JSON metrics column into individual columns
97
+ metrics = df["metrics"].copy()
98
+ metrics = pd.DataFrame(
99
+ metrics.apply(json.loads).values.tolist(), index=df.index
100
+ )
101
+ del df["metrics"]
102
+ for col in metrics.columns:
103
+ df[col] = metrics[col]
104
+ df.to_parquet(parquet_path)
105
+
106
+ @staticmethod
107
+ def import_from_parquet():
108
+ """
109
+ Imports to all DB files that have matching files under the same path but with extension ".parquet".
110
+ """
111
+ all_paths = os.listdir(TRACKIO_DIR)
112
+ parquet_paths = [f for f in all_paths if f.endswith(".parquet")]
113
+ for parquet_path in parquet_paths:
114
+ parquet_path = TRACKIO_DIR / parquet_path
115
+ db_path = parquet_path.with_suffix(".db")
116
+ df = pd.read_parquet(parquet_path)
117
+ with sqlite3.connect(db_path) as conn:
118
+ # fix up df to have a single JSON metrics column
119
+ if "metrics" not in df.columns:
120
+ # separate other columns from metrics
121
+ metrics = df.copy()
122
+ other_cols = ["id", "timestamp", "run_name", "step"]
123
+ df = df[other_cols]
124
+ for col in other_cols:
125
+ del metrics[col]
126
+ # combine them all into a single metrics col
127
+ metrics = json.loads(metrics.to_json(orient="records"))
128
+ df["metrics"] = [json.dumps(row) for row in metrics]
129
+ df.to_sql("metrics", conn, if_exists="replace", index=False)
130
+
131
+ @staticmethod
132
+ def get_scheduler():
133
+ """
134
+ Get the scheduler for the database based on the environment variables.
135
+ This applies to both local and Spaces.
136
+ """
137
+ with SQLiteStorage._scheduler_lock:
138
+ if SQLiteStorage._current_scheduler is not None:
139
+ return SQLiteStorage._current_scheduler
140
+ hf_token = os.environ.get("HF_TOKEN")
141
+ dataset_id = os.environ.get("TRACKIO_DATASET_ID")
142
+ space_repo_name = os.environ.get("SPACE_REPO_NAME")
143
+ if dataset_id is None or space_repo_name is None:
144
+ scheduler = DummyCommitScheduler()
145
+ else:
146
+ scheduler = CommitScheduler(
147
+ repo_id=dataset_id,
148
+ repo_type="dataset",
149
+ folder_path=TRACKIO_DIR,
150
+ private=True,
151
+ allow_patterns=["*.parquet", "media/**/*"],
152
+ squash_history=True,
153
+ token=hf_token,
154
+ on_before_commit=SQLiteStorage.export_to_parquet,
155
+ )
156
+ SQLiteStorage._current_scheduler = scheduler
157
+ return scheduler
158
+
159
+ @staticmethod
160
+ def log(project: str, run: str, metrics: dict, step: int | None = None):
161
+ """
162
+ Safely log metrics to the database. Before logging, this method will ensure the database exists
163
+ and is set up with the correct tables. It also uses the scheduler to lock the database so
164
+ that there is no race condition when logging / syncing to the Hugging Face Dataset.
165
+ """
166
+ db_path = SQLiteStorage.init_db(project)
167
+
168
+ with SQLiteStorage.get_scheduler().lock:
169
+ with SQLiteStorage._get_connection(db_path) as conn:
170
+ cursor = conn.cursor()
171
+
172
+ cursor.execute(
173
+ """
174
+ SELECT MAX(step)
175
+ FROM metrics
176
+ WHERE run_name = ?
177
+ """,
178
+ (run,),
179
+ )
180
+ last_step = cursor.fetchone()[0]
181
+ if step is None:
182
+ current_step = 0 if last_step is None else last_step + 1
183
+ else:
184
+ current_step = step
185
+
186
+ current_timestamp = datetime.now().isoformat()
187
+
188
+ cursor.execute(
189
+ """
190
+ INSERT INTO metrics
191
+ (timestamp, run_name, step, metrics)
192
+ VALUES (?, ?, ?, ?)
193
+ """,
194
+ (
195
+ current_timestamp,
196
+ run,
197
+ current_step,
198
+ json.dumps(metrics),
199
+ ),
200
+ )
201
+ conn.commit()
202
+
203
+ @staticmethod
204
+ def bulk_log(
205
+ project: str,
206
+ run: str,
207
+ metrics_list: list[dict],
208
+ steps: list[int] | None = None,
209
+ timestamps: list[str] | None = None,
210
+ ):
211
+ """Bulk log metrics to the database with specified steps and timestamps."""
212
+ if not metrics_list:
213
+ return
214
+
215
+ if timestamps is None:
216
+ timestamps = [datetime.now().isoformat()] * len(metrics_list)
217
+
218
+ db_path = SQLiteStorage.init_db(project)
219
+ with SQLiteStorage.get_scheduler().lock:
220
+ with SQLiteStorage._get_connection(db_path) as conn:
221
+ cursor = conn.cursor()
222
+
223
+ if steps is None:
224
+ steps = list(range(len(metrics_list)))
225
+ elif any(s is None for s in steps):
226
+ cursor.execute(
227
+ "SELECT MAX(step) FROM metrics WHERE run_name = ?", (run,)
228
+ )
229
+ last_step = cursor.fetchone()[0]
230
+ current_step = 0 if last_step is None else last_step + 1
231
+
232
+ processed_steps = []
233
+ for step in steps:
234
+ if step is None:
235
+ processed_steps.append(current_step)
236
+ current_step += 1
237
+ else:
238
+ processed_steps.append(step)
239
+ steps = processed_steps
240
+
241
+ if len(metrics_list) != len(steps) or len(metrics_list) != len(
242
+ timestamps
243
+ ):
244
+ raise ValueError(
245
+ "metrics_list, steps, and timestamps must have the same length"
246
+ )
247
+
248
+ data = []
249
+ for i, metrics in enumerate(metrics_list):
250
+ data.append(
251
+ (
252
+ timestamps[i],
253
+ run,
254
+ steps[i],
255
+ json.dumps(metrics),
256
+ )
257
+ )
258
+
259
+ cursor.executemany(
260
+ """
261
+ INSERT INTO metrics
262
+ (timestamp, run_name, step, metrics)
263
+ VALUES (?, ?, ?, ?)
264
+ """,
265
+ data,
266
+ )
267
+ conn.commit()
268
+
269
+ @staticmethod
270
+ def get_logs(project: str, run: str) -> list[dict]:
271
+ """Retrieve logs for a specific run. Logs include the step count (int) and the timestamp (datetime object)."""
272
+ db_path = SQLiteStorage.get_project_db_path(project)
273
+ if not db_path.exists():
274
+ return []
275
+
276
+ with SQLiteStorage._get_connection(db_path) as conn:
277
+ cursor = conn.cursor()
278
+ cursor.execute(
279
+ """
280
+ SELECT timestamp, step, metrics
281
+ FROM metrics
282
+ WHERE run_name = ?
283
+ ORDER BY timestamp
284
+ """,
285
+ (run,),
286
+ )
287
+
288
+ rows = cursor.fetchall()
289
+ results = []
290
+ for row in rows:
291
+ metrics = json.loads(row["metrics"])
292
+ metrics["timestamp"] = row["timestamp"]
293
+ metrics["step"] = row["step"]
294
+ results.append(metrics)
295
+ return results
296
+
297
+ @staticmethod
298
+ def load_from_dataset():
299
+ dataset_id = os.environ.get("TRACKIO_DATASET_ID")
300
+ space_repo_name = os.environ.get("SPACE_REPO_NAME")
301
+ if dataset_id is not None and space_repo_name is not None:
302
+ hfapi = hf.HfApi()
303
+ updated = False
304
+ if not TRACKIO_DIR.exists():
305
+ TRACKIO_DIR.mkdir(parents=True, exist_ok=True)
306
+ with SQLiteStorage.get_scheduler().lock:
307
+ try:
308
+ files = hfapi.list_repo_files(dataset_id, repo_type="dataset")
309
+ for file in files:
310
+ # Download parquet and media assets
311
+ if not (file.endswith(".parquet") or file.startswith("media/")):
312
+ continue
313
+ hf.hf_hub_download(
314
+ dataset_id, file, repo_type="dataset", local_dir=TRACKIO_DIR
315
+ )
316
+ updated = True
317
+ except hf.errors.EntryNotFoundError:
318
+ pass
319
+ except hf.errors.RepositoryNotFoundError:
320
+ pass
321
+ if updated:
322
+ SQLiteStorage.import_from_parquet()
323
+ SQLiteStorage._dataset_import_attempted = True
324
+
325
+ @staticmethod
326
+ def get_projects() -> list[str]:
327
+ """
328
+ Get list of all projects by scanning the database files in the trackio directory.
329
+ """
330
+ if not SQLiteStorage._dataset_import_attempted:
331
+ SQLiteStorage.load_from_dataset()
332
+
333
+ projects: set[str] = set()
334
+ if not TRACKIO_DIR.exists():
335
+ return []
336
+
337
+ for db_file in TRACKIO_DIR.glob("*.db"):
338
+ project_name = db_file.stem
339
+ projects.add(project_name)
340
+ return sorted(projects)
341
+
342
+ @staticmethod
343
+ def get_runs(project: str) -> list[str]:
344
+ """Get list of all runs for a project."""
345
+ db_path = SQLiteStorage.get_project_db_path(project)
346
+ if not db_path.exists():
347
+ return []
348
+
349
+ with SQLiteStorage._get_connection(db_path) as conn:
350
+ cursor = conn.cursor()
351
+ cursor.execute(
352
+ "SELECT DISTINCT run_name FROM metrics",
353
+ )
354
+ return [row[0] for row in cursor.fetchall()]
355
+
356
+ @staticmethod
357
+ def get_max_steps_for_runs(project: str, runs: list[str]) -> dict[str, int]:
358
+ """Efficiently get the maximum step for multiple runs in a single query."""
359
+ db_path = SQLiteStorage.get_project_db_path(project)
360
+ if not db_path.exists():
361
+ return {run: 0 for run in runs}
362
+
363
+ with SQLiteStorage._get_connection(db_path) as conn:
364
+ cursor = conn.cursor()
365
+ placeholders = ",".join("?" * len(runs))
366
+ cursor.execute(
367
+ f"""
368
+ SELECT run_name, MAX(step) as max_step
369
+ FROM metrics
370
+ WHERE run_name IN ({placeholders})
371
+ GROUP BY run_name
372
+ """,
373
+ runs,
374
+ )
375
+
376
+ results = {run: 0 for run in runs} # Default to 0 for runs with no data
377
+ for row in cursor.fetchall():
378
+ results[row["run_name"]] = row["max_step"]
379
+
380
+ return results
381
+
382
+ def finish(self):
383
+ """Cleanup when run is finished."""
384
+ pass
typehints.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, TypedDict
2
+
3
+ from gradio import FileData
4
+
5
+
6
+ class LogEntry(TypedDict):
7
+ project: str
8
+ run: str
9
+ metrics: dict[str, Any]
10
+ step: int | None
11
+
12
+
13
+ class UploadEntry(TypedDict):
14
+ project: str
15
+ run: str
16
+ step: int | None
17
+ uploaded_file: FileData
ui.py ADDED
@@ -0,0 +1,711 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import shutil
4
+ from typing import Any
5
+
6
+ import gradio as gr
7
+ import huggingface_hub as hf
8
+ import numpy as np
9
+ import pandas as pd
10
+
11
+ HfApi = hf.HfApi()
12
+
13
+ try:
14
+ import trackio.utils as utils
15
+ from trackio.file_storage import FileStorage
16
+ from trackio.media import TrackioImage
17
+ from trackio.sqlite_storage import SQLiteStorage
18
+ from trackio.typehints import LogEntry, UploadEntry
19
+ except: # noqa: E722
20
+ import utils
21
+ from file_storage import FileStorage
22
+ from media import TrackioImage
23
+ from sqlite_storage import SQLiteStorage
24
+ from typehints import LogEntry, UploadEntry
25
+
26
+
27
+ def get_project_info() -> str | None:
28
+ dataset_id = os.environ.get("TRACKIO_DATASET_ID")
29
+ space_id = os.environ.get("SPACE_ID")
30
+ persistent_storage_enabled = os.environ.get(
31
+ "PERSISTANT_STORAGE_ENABLED"
32
+ ) # Space env name has a typo
33
+ if persistent_storage_enabled:
34
+ return "&#10024; Persistent Storage is enabled, logs are stored directly in this Space."
35
+ if dataset_id:
36
+ sync_status = utils.get_sync_status(SQLiteStorage.get_scheduler())
37
+ upgrade_message = f"New changes are synced every 5 min <span class='info-container'><input type='checkbox' class='info-checkbox' id='upgrade-info'><label for='upgrade-info' class='info-icon'>&#9432;</label><span class='info-expandable'> To avoid losing data between syncs, <a href='https://huggingface.co/spaces/{space_id}/settings' class='accent-link'>click here</a> to open this Space's settings and add Persistent Storage.</span></span>"
38
+ if sync_status is not None:
39
+ info = f"&#x21bb; Backed up {sync_status} min ago to <a href='https://huggingface.co/datasets/{dataset_id}' target='_blank' class='accent-link'>{dataset_id}</a> | {upgrade_message}"
40
+ else:
41
+ info = f"&#x21bb; Not backed up yet to <a href='https://huggingface.co/datasets/{dataset_id}' target='_blank' class='accent-link'>{dataset_id}</a> | {upgrade_message}"
42
+ return info
43
+ return None
44
+
45
+
46
+ def get_projects(request: gr.Request):
47
+ projects = SQLiteStorage.get_projects()
48
+ if project := request.query_params.get("project"):
49
+ interactive = False
50
+ else:
51
+ interactive = True
52
+ project = projects[0] if projects else None
53
+
54
+ return gr.Dropdown(
55
+ label="Project",
56
+ choices=projects,
57
+ value=project,
58
+ allow_custom_value=True,
59
+ interactive=interactive,
60
+ info=get_project_info(),
61
+ )
62
+
63
+
64
+ def get_runs(project) -> list[str]:
65
+ if not project:
66
+ return []
67
+ return SQLiteStorage.get_runs(project)
68
+
69
+
70
+ def get_available_metrics(project: str, runs: list[str]) -> list[str]:
71
+ """Get all available metrics across all runs for x-axis selection."""
72
+ if not project or not runs:
73
+ return ["step", "time"]
74
+
75
+ all_metrics = set()
76
+ for run in runs:
77
+ metrics = SQLiteStorage.get_logs(project, run)
78
+ if metrics:
79
+ df = pd.DataFrame(metrics)
80
+ numeric_cols = df.select_dtypes(include="number").columns
81
+ numeric_cols = [c for c in numeric_cols if c not in utils.RESERVED_KEYS]
82
+ all_metrics.update(numeric_cols)
83
+
84
+ all_metrics.add("step")
85
+ all_metrics.add("time")
86
+
87
+ sorted_metrics = utils.sort_metrics_by_prefix(list(all_metrics))
88
+
89
+ result = ["step", "time"]
90
+ for metric in sorted_metrics:
91
+ if metric not in result:
92
+ result.append(metric)
93
+
94
+ return result
95
+
96
+
97
+ def extract_images(logs: list[dict]) -> dict[str, list[TrackioImage]]:
98
+ image_data = {}
99
+ logs = sorted(logs, key=lambda x: x.get("step", 0))
100
+ for log in logs:
101
+ for key, value in log.items():
102
+ if isinstance(value, dict) and value.get("_type") == TrackioImage.TYPE:
103
+ if key not in image_data:
104
+ image_data[key] = []
105
+ try:
106
+ image_data[key].append(TrackioImage._from_dict(value))
107
+ except Exception as e:
108
+ print(f"Image not currently available: {key}: {e}")
109
+ return image_data
110
+
111
+
112
+ def load_run_data(
113
+ project: str | None,
114
+ run: str | None,
115
+ smoothing: bool,
116
+ x_axis: str,
117
+ log_scale: bool = False,
118
+ ) -> tuple[pd.DataFrame, dict]:
119
+ if not project or not run:
120
+ return None, None
121
+
122
+ logs = SQLiteStorage.get_logs(project, run)
123
+ if not logs:
124
+ return None, None
125
+
126
+ images = extract_images(logs)
127
+ df = pd.DataFrame(logs)
128
+
129
+ if "step" not in df.columns:
130
+ df["step"] = range(len(df))
131
+
132
+ if x_axis == "time" and "timestamp" in df.columns:
133
+ df["timestamp"] = pd.to_datetime(df["timestamp"])
134
+ first_timestamp = df["timestamp"].min()
135
+ df["time"] = (df["timestamp"] - first_timestamp).dt.total_seconds()
136
+ x_column = "time"
137
+ elif x_axis == "step":
138
+ x_column = "step"
139
+ else:
140
+ x_column = x_axis
141
+
142
+ if log_scale and x_column in df.columns:
143
+ x_vals = df[x_column]
144
+ if (x_vals <= 0).any():
145
+ df[x_column] = np.log10(np.maximum(x_vals, 0) + 1)
146
+ else:
147
+ df[x_column] = np.log10(x_vals)
148
+
149
+ if smoothing:
150
+ numeric_cols = df.select_dtypes(include="number").columns
151
+ numeric_cols = [c for c in numeric_cols if c not in utils.RESERVED_KEYS]
152
+
153
+ df_original = df.copy()
154
+ df_original["run"] = f"{run}_original"
155
+ df_original["data_type"] = "original"
156
+
157
+ df_smoothed = df.copy()
158
+ window_size = max(3, min(10, len(df) // 10)) # Adaptive window size
159
+ df_smoothed[numeric_cols] = (
160
+ df_smoothed[numeric_cols]
161
+ .rolling(window=window_size, center=True, min_periods=1)
162
+ .mean()
163
+ )
164
+ df_smoothed["run"] = f"{run}_smoothed"
165
+ df_smoothed["data_type"] = "smoothed"
166
+
167
+ combined_df = pd.concat([df_original, df_smoothed], ignore_index=True)
168
+ combined_df["x_axis"] = x_column
169
+ return combined_df, images
170
+ else:
171
+ df["run"] = run
172
+ df["data_type"] = "original"
173
+ df["x_axis"] = x_column
174
+ return df, images
175
+
176
+
177
+ def update_runs(project, filter_text, user_interacted_with_runs=False):
178
+ if project is None:
179
+ runs = []
180
+ num_runs = 0
181
+ else:
182
+ runs = get_runs(project)
183
+ num_runs = len(runs)
184
+ if filter_text:
185
+ runs = [r for r in runs if filter_text in r]
186
+ if not user_interacted_with_runs:
187
+ return gr.CheckboxGroup(choices=runs, value=runs), gr.Textbox(
188
+ label=f"Runs ({num_runs})"
189
+ )
190
+ else:
191
+ return gr.CheckboxGroup(choices=runs), gr.Textbox(label=f"Runs ({num_runs})")
192
+
193
+
194
+ def filter_runs(project, filter_text):
195
+ runs = get_runs(project)
196
+ runs = [r for r in runs if filter_text in r]
197
+ return gr.CheckboxGroup(choices=runs, value=runs)
198
+
199
+
200
+ def update_x_axis_choices(project, runs):
201
+ """Update x-axis dropdown choices based on available metrics."""
202
+ available_metrics = get_available_metrics(project, runs)
203
+ return gr.Dropdown(
204
+ label="X-axis",
205
+ choices=available_metrics,
206
+ value="step",
207
+ )
208
+
209
+
210
+ def toggle_timer(cb_value):
211
+ if cb_value:
212
+ return gr.Timer(active=True)
213
+ else:
214
+ return gr.Timer(active=False)
215
+
216
+
217
+ def check_auth(hf_token: str | None) -> None:
218
+ if os.getenv("SYSTEM") == "spaces": # if we are running in Spaces
219
+ # check auth token passed in
220
+ if hf_token is None:
221
+ raise PermissionError(
222
+ "Expected a HF_TOKEN to be provided when logging to a Space"
223
+ )
224
+ who = HfApi.whoami(hf_token)
225
+ access_token = who["auth"]["accessToken"]
226
+ owner_name = os.getenv("SPACE_AUTHOR_NAME")
227
+ repo_name = os.getenv("SPACE_REPO_NAME")
228
+ # make sure the token user is either the author of the space,
229
+ # or is a member of an org that is the author.
230
+ orgs = [o["name"] for o in who["orgs"]]
231
+ if owner_name != who["name"] and owner_name not in orgs:
232
+ raise PermissionError(
233
+ "Expected the provided hf_token to be the user owner of the space, or be a member of the org owner of the space"
234
+ )
235
+ # reject fine-grained tokens without specific repo access
236
+ if access_token["role"] == "fineGrained":
237
+ matched = False
238
+ for item in access_token["fineGrained"]["scoped"]:
239
+ if (
240
+ item["entity"]["type"] == "space"
241
+ and item["entity"]["name"] == f"{owner_name}/{repo_name}"
242
+ and "repo.write" in item["permissions"]
243
+ ):
244
+ matched = True
245
+ break
246
+ if (
247
+ (
248
+ item["entity"]["type"] == "user"
249
+ or item["entity"]["type"] == "org"
250
+ )
251
+ and item["entity"]["name"] == owner_name
252
+ and "repo.write" in item["permissions"]
253
+ ):
254
+ matched = True
255
+ break
256
+ if not matched:
257
+ raise PermissionError(
258
+ "Expected the provided hf_token with fine grained permissions to provide write access to the space"
259
+ )
260
+ # reject read-only tokens
261
+ elif access_token["role"] != "write":
262
+ raise PermissionError(
263
+ "Expected the provided hf_token to provide write permissions"
264
+ )
265
+
266
+
267
+ def upload_db_to_space(
268
+ project: str, uploaded_db: gr.FileData, hf_token: str | None
269
+ ) -> None:
270
+ check_auth(hf_token)
271
+ db_project_path = SQLiteStorage.get_project_db_path(project)
272
+ if os.path.exists(db_project_path):
273
+ raise gr.Error(
274
+ f"Trackio database file already exists for project {project}, cannot overwrite."
275
+ )
276
+ os.makedirs(os.path.dirname(db_project_path), exist_ok=True)
277
+ shutil.copy(uploaded_db["path"], db_project_path)
278
+
279
+
280
+ def bulk_upload_media(uploads: list[UploadEntry], hf_token: str | None) -> None:
281
+ check_auth(hf_token)
282
+ for upload in uploads:
283
+ media_path = FileStorage.init_project_media_path(
284
+ upload["project"], upload["run"], upload["step"]
285
+ )
286
+ shutil.copy(upload["uploaded_file"]["path"], media_path)
287
+
288
+
289
+ def log(
290
+ project: str,
291
+ run: str,
292
+ metrics: dict[str, Any],
293
+ step: int | None,
294
+ hf_token: str | None,
295
+ ) -> None:
296
+ check_auth(hf_token)
297
+ SQLiteStorage.log(project=project, run=run, metrics=metrics, step=step)
298
+
299
+
300
+ def bulk_log(
301
+ logs: list[LogEntry],
302
+ hf_token: str | None,
303
+ ) -> None:
304
+ check_auth(hf_token)
305
+
306
+ logs_by_run = {}
307
+ for log_entry in logs:
308
+ key = (log_entry["project"], log_entry["run"])
309
+ if key not in logs_by_run:
310
+ logs_by_run[key] = {"metrics": [], "steps": []}
311
+ logs_by_run[key]["metrics"].append(log_entry["metrics"])
312
+ logs_by_run[key]["steps"].append(log_entry.get("step"))
313
+
314
+ for (project, run), data in logs_by_run.items():
315
+ SQLiteStorage.bulk_log(
316
+ project=project,
317
+ run=run,
318
+ metrics_list=data["metrics"],
319
+ steps=data["steps"],
320
+ )
321
+
322
+
323
+ def filter_metrics_by_regex(metrics: list[str], filter_pattern: str) -> list[str]:
324
+ """
325
+ Filter metrics using regex pattern.
326
+
327
+ Args:
328
+ metrics: List of metric names to filter
329
+ filter_pattern: Regex pattern to match against metric names
330
+
331
+ Returns:
332
+ List of metric names that match the pattern
333
+ """
334
+ if not filter_pattern.strip():
335
+ return metrics
336
+
337
+ try:
338
+ pattern = re.compile(filter_pattern, re.IGNORECASE)
339
+ return [metric for metric in metrics if pattern.search(metric)]
340
+ except re.error:
341
+ return [
342
+ metric for metric in metrics if filter_pattern.lower() in metric.lower()
343
+ ]
344
+
345
+
346
+ def configure(request: gr.Request):
347
+ sidebar_param = request.query_params.get("sidebar")
348
+ match sidebar_param:
349
+ case "collapsed":
350
+ sidebar = gr.Sidebar(open=False, visible=True)
351
+ case "hidden":
352
+ sidebar = gr.Sidebar(open=False, visible=False)
353
+ case _:
354
+ sidebar = gr.Sidebar(open=True, visible=True)
355
+
356
+ if metrics := request.query_params.get("metrics"):
357
+ return metrics.split(","), sidebar
358
+ else:
359
+ return [], sidebar
360
+
361
+
362
+ def create_image_section(images_by_run: dict[str, dict[str, list[TrackioImage]]]):
363
+ with gr.Accordion(label="media"):
364
+ with gr.Group(elem_classes=("media-group")):
365
+ for run, images_by_key in images_by_run.items():
366
+ with gr.Tab(label=run, elem_classes=("media-tab")):
367
+ for key, images in images_by_key.items():
368
+ gr.Gallery(
369
+ [(image._pil, image.caption) for image in images],
370
+ label=key,
371
+ columns=6,
372
+ elem_classes=("media-gallery"),
373
+ )
374
+
375
+
376
+ css = """
377
+ #run-cb .wrap { gap: 2px; }
378
+ #run-cb .wrap label {
379
+ line-height: 1;
380
+ padding: 6px;
381
+ }
382
+ .logo-light { display: block; }
383
+ .logo-dark { display: none; }
384
+ .dark .logo-light { display: none; }
385
+ .dark .logo-dark { display: block; }
386
+ .dark .caption-label { color: white; }
387
+
388
+ .info-container {
389
+ position: relative;
390
+ display: inline;
391
+ }
392
+ .info-checkbox {
393
+ position: absolute;
394
+ opacity: 0;
395
+ pointer-events: none;
396
+ }
397
+ .info-icon {
398
+ border-bottom: 1px dotted;
399
+ cursor: pointer;
400
+ user-select: none;
401
+ color: var(--color-accent);
402
+ }
403
+ .info-expandable {
404
+ display: none;
405
+ opacity: 0;
406
+ transition: opacity 0.2s ease-in-out;
407
+ }
408
+ .info-checkbox:checked ~ .info-expandable {
409
+ display: inline;
410
+ opacity: 1;
411
+ }
412
+ .info-icon:hover { opacity: 0.8; }
413
+ .accent-link { font-weight: bold; }
414
+
415
+ .media-gallery { max-height: 325px; }
416
+ .media-group, .media-group > div { background: none; }
417
+ .media-group .tabs { padding: 0.5em; }
418
+ """
419
+
420
+ with gr.Blocks(theme="citrus", title="Trackio Dashboard", css=css) as demo:
421
+ with gr.Sidebar(open=False) as sidebar:
422
+ logo = gr.Markdown(
423
+ f"""
424
+ <img src='/gradio_api/file={utils.TRACKIO_LOGO_DIR}/trackio_logo_type_light_transparent.png' width='80%' class='logo-light'>
425
+ <img src='/gradio_api/file={utils.TRACKIO_LOGO_DIR}/trackio_logo_type_dark_transparent.png' width='80%' class='logo-dark'>
426
+ """
427
+ )
428
+ project_dd = gr.Dropdown(label="Project", allow_custom_value=True)
429
+ run_tb = gr.Textbox(label="Runs", placeholder="Type to filter...")
430
+ run_cb = gr.CheckboxGroup(
431
+ label="Runs", choices=[], interactive=True, elem_id="run-cb"
432
+ )
433
+ gr.HTML("<hr>")
434
+ realtime_cb = gr.Checkbox(label="Refresh metrics realtime", value=True)
435
+ smoothing_cb = gr.Checkbox(label="Smooth metrics", value=True)
436
+ x_axis_dd = gr.Dropdown(
437
+ label="X-axis",
438
+ choices=["step", "time"],
439
+ value="step",
440
+ )
441
+ log_scale_cb = gr.Checkbox(label="Log scale X-axis", value=False)
442
+ metric_filter_tb = gr.Textbox(
443
+ label="Metric Filter (regex)",
444
+ placeholder="e.g., loss|ndcg@10|gpu",
445
+ value="",
446
+ info="Filter metrics using regex patterns. Leave empty to show all metrics.",
447
+ )
448
+
449
+ timer = gr.Timer(value=1)
450
+ metrics_subset = gr.State([])
451
+ user_interacted_with_run_cb = gr.State(False)
452
+
453
+ gr.on([demo.load], fn=configure, outputs=[metrics_subset, sidebar])
454
+ gr.on(
455
+ [demo.load],
456
+ fn=get_projects,
457
+ outputs=project_dd,
458
+ show_progress="hidden",
459
+ )
460
+ gr.on(
461
+ [timer.tick],
462
+ fn=update_runs,
463
+ inputs=[project_dd, run_tb, user_interacted_with_run_cb],
464
+ outputs=[run_cb, run_tb],
465
+ show_progress="hidden",
466
+ )
467
+ gr.on(
468
+ [timer.tick],
469
+ fn=lambda: gr.Dropdown(info=get_project_info()),
470
+ outputs=[project_dd],
471
+ show_progress="hidden",
472
+ )
473
+ gr.on(
474
+ [demo.load, project_dd.change],
475
+ fn=update_runs,
476
+ inputs=[project_dd, run_tb],
477
+ outputs=[run_cb, run_tb],
478
+ show_progress="hidden",
479
+ )
480
+ gr.on(
481
+ [demo.load, project_dd.change, run_cb.change],
482
+ fn=update_x_axis_choices,
483
+ inputs=[project_dd, run_cb],
484
+ outputs=x_axis_dd,
485
+ show_progress="hidden",
486
+ )
487
+
488
+ realtime_cb.change(
489
+ fn=toggle_timer,
490
+ inputs=realtime_cb,
491
+ outputs=timer,
492
+ api_name="toggle_timer",
493
+ )
494
+ run_cb.input(
495
+ fn=lambda: True,
496
+ outputs=user_interacted_with_run_cb,
497
+ )
498
+ run_tb.input(
499
+ fn=filter_runs,
500
+ inputs=[project_dd, run_tb],
501
+ outputs=run_cb,
502
+ )
503
+
504
+ gr.api(
505
+ fn=upload_db_to_space,
506
+ api_name="upload_db_to_space",
507
+ )
508
+ gr.api(
509
+ fn=bulk_upload_media,
510
+ api_name="bulk_upload_media",
511
+ )
512
+ gr.api(
513
+ fn=log,
514
+ api_name="log",
515
+ )
516
+ gr.api(
517
+ fn=bulk_log,
518
+ api_name="bulk_log",
519
+ )
520
+
521
+ x_lim = gr.State(None)
522
+ last_steps = gr.State({})
523
+
524
+ def update_x_lim(select_data: gr.SelectData):
525
+ return select_data.index
526
+
527
+ def update_last_steps(project, runs):
528
+ """Update the last step from all runs to detect when new data is available."""
529
+ if not project or not runs:
530
+ return {}
531
+
532
+ return SQLiteStorage.get_max_steps_for_runs(project, runs)
533
+
534
+ timer.tick(
535
+ fn=update_last_steps,
536
+ inputs=[project_dd, run_cb],
537
+ outputs=last_steps,
538
+ show_progress="hidden",
539
+ )
540
+
541
+ @gr.render(
542
+ triggers=[
543
+ demo.load,
544
+ run_cb.change,
545
+ last_steps.change,
546
+ smoothing_cb.change,
547
+ x_lim.change,
548
+ x_axis_dd.change,
549
+ log_scale_cb.change,
550
+ metric_filter_tb.change,
551
+ ],
552
+ inputs=[
553
+ project_dd,
554
+ run_cb,
555
+ smoothing_cb,
556
+ metrics_subset,
557
+ x_lim,
558
+ x_axis_dd,
559
+ log_scale_cb,
560
+ metric_filter_tb,
561
+ ],
562
+ show_progress="hidden",
563
+ )
564
+ def update_dashboard(
565
+ project,
566
+ runs,
567
+ smoothing,
568
+ metrics_subset,
569
+ x_lim_value,
570
+ x_axis,
571
+ log_scale,
572
+ metric_filter,
573
+ ):
574
+ dfs = []
575
+ images_by_run = {}
576
+ original_runs = runs.copy()
577
+
578
+ for run in runs:
579
+ df, images_by_key = load_run_data(
580
+ project, run, smoothing, x_axis, log_scale
581
+ )
582
+ if df is not None:
583
+ dfs.append(df)
584
+ images_by_run[run] = images_by_key
585
+ if dfs:
586
+ master_df = pd.concat(dfs, ignore_index=True)
587
+ else:
588
+ master_df = pd.DataFrame()
589
+
590
+ if master_df.empty:
591
+ return
592
+
593
+ x_column = "step"
594
+ if dfs and not dfs[0].empty and "x_axis" in dfs[0].columns:
595
+ x_column = dfs[0]["x_axis"].iloc[0]
596
+
597
+ numeric_cols = master_df.select_dtypes(include="number").columns
598
+ numeric_cols = [c for c in numeric_cols if c not in utils.RESERVED_KEYS]
599
+ if metrics_subset:
600
+ numeric_cols = [c for c in numeric_cols if c in metrics_subset]
601
+
602
+ if metric_filter and metric_filter.strip():
603
+ numeric_cols = filter_metrics_by_regex(list(numeric_cols), metric_filter)
604
+
605
+ nested_metric_groups = utils.group_metrics_with_subprefixes(list(numeric_cols))
606
+ color_map = utils.get_color_mapping(original_runs, smoothing)
607
+
608
+ metric_idx = 0
609
+ for group_name in sorted(nested_metric_groups.keys()):
610
+ group_data = nested_metric_groups[group_name]
611
+
612
+ with gr.Accordion(
613
+ label=group_name,
614
+ open=True,
615
+ key=f"accordion-{group_name}",
616
+ preserved_by_key=["value", "open"],
617
+ ):
618
+ # Render direct metrics at this level
619
+ if group_data["direct_metrics"]:
620
+ with gr.Row(key=f"row-{group_name}-direct"):
621
+ for metric_name in group_data["direct_metrics"]:
622
+ metric_df = master_df.dropna(subset=[metric_name])
623
+ color = "run" if "run" in metric_df.columns else None
624
+ if not metric_df.empty:
625
+ plot = gr.LinePlot(
626
+ utils.downsample(
627
+ metric_df,
628
+ x_column,
629
+ metric_name,
630
+ color,
631
+ x_lim_value,
632
+ ),
633
+ x=x_column,
634
+ y=metric_name,
635
+ y_title=metric_name.split("/")[-1],
636
+ color=color,
637
+ color_map=color_map,
638
+ title=metric_name,
639
+ key=f"plot-{metric_idx}",
640
+ preserved_by_key=None,
641
+ x_lim=x_lim_value,
642
+ show_fullscreen_button=True,
643
+ min_width=400,
644
+ )
645
+ plot.select(
646
+ update_x_lim,
647
+ outputs=x_lim,
648
+ key=f"select-{metric_idx}",
649
+ )
650
+ plot.double_click(
651
+ lambda: None,
652
+ outputs=x_lim,
653
+ key=f"double-{metric_idx}",
654
+ )
655
+ metric_idx += 1
656
+
657
+ # If there are subgroups, create nested accordions
658
+ if group_data["subgroups"]:
659
+ for subgroup_name in sorted(group_data["subgroups"].keys()):
660
+ subgroup_metrics = group_data["subgroups"][subgroup_name]
661
+
662
+ with gr.Accordion(
663
+ label=subgroup_name,
664
+ open=True,
665
+ key=f"accordion-{group_name}-{subgroup_name}",
666
+ preserved_by_key=["value", "open"],
667
+ ):
668
+ with gr.Row(key=f"row-{group_name}-{subgroup_name}"):
669
+ for metric_name in subgroup_metrics:
670
+ metric_df = master_df.dropna(subset=[metric_name])
671
+ color = (
672
+ "run" if "run" in metric_df.columns else None
673
+ )
674
+ if not metric_df.empty:
675
+ plot = gr.LinePlot(
676
+ utils.downsample(
677
+ metric_df,
678
+ x_column,
679
+ metric_name,
680
+ color,
681
+ x_lim_value,
682
+ ),
683
+ x=x_column,
684
+ y=metric_name,
685
+ y_title=metric_name.split("/")[-1],
686
+ color=color,
687
+ color_map=color_map,
688
+ title=metric_name,
689
+ key=f"plot-{metric_idx}",
690
+ preserved_by_key=None,
691
+ x_lim=x_lim_value,
692
+ show_fullscreen_button=True,
693
+ min_width=400,
694
+ )
695
+ plot.select(
696
+ update_x_lim,
697
+ outputs=x_lim,
698
+ key=f"select-{metric_idx}",
699
+ )
700
+ plot.double_click(
701
+ lambda: None,
702
+ outputs=x_lim,
703
+ key=f"double-{metric_idx}",
704
+ )
705
+ metric_idx += 1
706
+ if images_by_run and any(any(images) for images in images_by_run.values()):
707
+ create_image_section(images_by_run)
708
+
709
+
710
+ if __name__ == "__main__":
711
+ demo.launch(allowed_paths=[utils.TRACKIO_LOGO_DIR], show_api=False, show_error=True)
utils.py ADDED
@@ -0,0 +1,568 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import sys
3
+ import time
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING
6
+
7
+ import huggingface_hub
8
+ import numpy as np
9
+ import pandas as pd
10
+ from huggingface_hub.constants import HF_HOME
11
+
12
+ if TYPE_CHECKING:
13
+ from trackio.commit_scheduler import CommitScheduler
14
+ from trackio.dummy_commit_scheduler import DummyCommitScheduler
15
+
16
+ RESERVED_KEYS = ["project", "run", "timestamp", "step", "time", "metrics"]
17
+ TRACKIO_DIR = Path(HF_HOME) / "trackio"
18
+
19
+ TRACKIO_LOGO_DIR = Path(__file__).parent / "assets"
20
+
21
+
22
+ def generate_readable_name(used_names: list[str], space_id: str | None = None) -> str:
23
+ """
24
+ Generates a random, readable name like "dainty-sunset-0".
25
+ If space_id is provided, generates username-timestamp format instead.
26
+ """
27
+ if space_id is not None:
28
+ username = huggingface_hub.whoami()["name"]
29
+ timestamp = int(time.time())
30
+ return f"{username}-{timestamp}"
31
+ adjectives = [
32
+ "dainty",
33
+ "brave",
34
+ "calm",
35
+ "eager",
36
+ "fancy",
37
+ "gentle",
38
+ "happy",
39
+ "jolly",
40
+ "kind",
41
+ "lively",
42
+ "merry",
43
+ "nice",
44
+ "proud",
45
+ "quick",
46
+ "hugging",
47
+ "silly",
48
+ "tidy",
49
+ "witty",
50
+ "zealous",
51
+ "bright",
52
+ "shy",
53
+ "bold",
54
+ "clever",
55
+ "daring",
56
+ "elegant",
57
+ "faithful",
58
+ "graceful",
59
+ "honest",
60
+ "inventive",
61
+ "jovial",
62
+ "keen",
63
+ "lucky",
64
+ "modest",
65
+ "noble",
66
+ "optimistic",
67
+ "patient",
68
+ "quirky",
69
+ "resourceful",
70
+ "sincere",
71
+ "thoughtful",
72
+ "upbeat",
73
+ "valiant",
74
+ "warm",
75
+ "youthful",
76
+ "zesty",
77
+ "adventurous",
78
+ "breezy",
79
+ "cheerful",
80
+ "delightful",
81
+ "energetic",
82
+ "fearless",
83
+ "glad",
84
+ "hopeful",
85
+ "imaginative",
86
+ "joyful",
87
+ "kindly",
88
+ "luminous",
89
+ "mysterious",
90
+ "neat",
91
+ "outgoing",
92
+ "playful",
93
+ "radiant",
94
+ "spirited",
95
+ "tranquil",
96
+ "unique",
97
+ "vivid",
98
+ "wise",
99
+ "zany",
100
+ "artful",
101
+ "bubbly",
102
+ "charming",
103
+ "dazzling",
104
+ "earnest",
105
+ "festive",
106
+ "gentlemanly",
107
+ "hearty",
108
+ "intrepid",
109
+ "jubilant",
110
+ "knightly",
111
+ "lively",
112
+ "magnetic",
113
+ "nimble",
114
+ "orderly",
115
+ "peaceful",
116
+ "quick-witted",
117
+ "robust",
118
+ "sturdy",
119
+ "trusty",
120
+ "upstanding",
121
+ "vibrant",
122
+ "whimsical",
123
+ ]
124
+ nouns = [
125
+ "sunset",
126
+ "forest",
127
+ "river",
128
+ "mountain",
129
+ "breeze",
130
+ "meadow",
131
+ "ocean",
132
+ "valley",
133
+ "sky",
134
+ "field",
135
+ "cloud",
136
+ "star",
137
+ "rain",
138
+ "leaf",
139
+ "stone",
140
+ "flower",
141
+ "bird",
142
+ "tree",
143
+ "wave",
144
+ "trail",
145
+ "island",
146
+ "desert",
147
+ "hill",
148
+ "lake",
149
+ "pond",
150
+ "grove",
151
+ "canyon",
152
+ "reef",
153
+ "bay",
154
+ "peak",
155
+ "glade",
156
+ "marsh",
157
+ "cliff",
158
+ "dune",
159
+ "spring",
160
+ "brook",
161
+ "cave",
162
+ "plain",
163
+ "ridge",
164
+ "wood",
165
+ "blossom",
166
+ "petal",
167
+ "root",
168
+ "branch",
169
+ "seed",
170
+ "acorn",
171
+ "pine",
172
+ "willow",
173
+ "cedar",
174
+ "elm",
175
+ "falcon",
176
+ "eagle",
177
+ "sparrow",
178
+ "robin",
179
+ "owl",
180
+ "finch",
181
+ "heron",
182
+ "crane",
183
+ "duck",
184
+ "swan",
185
+ "fox",
186
+ "wolf",
187
+ "bear",
188
+ "deer",
189
+ "moose",
190
+ "otter",
191
+ "beaver",
192
+ "lynx",
193
+ "hare",
194
+ "badger",
195
+ "butterfly",
196
+ "bee",
197
+ "ant",
198
+ "beetle",
199
+ "dragonfly",
200
+ "firefly",
201
+ "ladybug",
202
+ "moth",
203
+ "spider",
204
+ "worm",
205
+ "coral",
206
+ "kelp",
207
+ "shell",
208
+ "pebble",
209
+ "face",
210
+ "boulder",
211
+ "cobble",
212
+ "sand",
213
+ "wavelet",
214
+ "tide",
215
+ "current",
216
+ "mist",
217
+ ]
218
+ number = 0
219
+ name = f"{adjectives[0]}-{nouns[0]}-{number}"
220
+ while name in used_names:
221
+ number += 1
222
+ adjective = adjectives[number % len(adjectives)]
223
+ noun = nouns[number % len(nouns)]
224
+ name = f"{adjective}-{noun}-{number}"
225
+ return name
226
+
227
+
228
+ def block_except_in_notebook():
229
+ in_notebook = bool(getattr(sys, "ps1", sys.flags.interactive))
230
+ if in_notebook:
231
+ return
232
+ try:
233
+ while True:
234
+ time.sleep(0.1)
235
+ except (KeyboardInterrupt, OSError):
236
+ print("Keyboard interruption in main thread... closing dashboard.")
237
+
238
+
239
+ def simplify_column_names(columns: list[str]) -> dict[str, str]:
240
+ """
241
+ Simplifies column names to first 10 alphanumeric or "/" characters with unique suffixes.
242
+
243
+ Args:
244
+ columns: List of original column names
245
+
246
+ Returns:
247
+ Dictionary mapping original column names to simplified names
248
+ """
249
+ simplified_names = {}
250
+ used_names = set()
251
+
252
+ for col in columns:
253
+ alphanumeric = re.sub(r"[^a-zA-Z0-9/]", "", col)
254
+ base_name = alphanumeric[:10] if alphanumeric else f"col_{len(used_names)}"
255
+
256
+ final_name = base_name
257
+ suffix = 1
258
+ while final_name in used_names:
259
+ final_name = f"{base_name}_{suffix}"
260
+ suffix += 1
261
+
262
+ simplified_names[col] = final_name
263
+ used_names.add(final_name)
264
+
265
+ return simplified_names
266
+
267
+
268
+ def print_dashboard_instructions(project: str) -> None:
269
+ """
270
+ Prints instructions for viewing the Trackio dashboard.
271
+
272
+ Args:
273
+ project: The name of the project to show dashboard for.
274
+ """
275
+ YELLOW = "\033[93m"
276
+ BOLD = "\033[1m"
277
+ RESET = "\033[0m"
278
+
279
+ print("* View dashboard by running in your terminal:")
280
+ print(f'{BOLD}{YELLOW}trackio show --project "{project}"{RESET}')
281
+ print(f'* or by running in Python: trackio.show(project="{project}")')
282
+
283
+
284
+ def preprocess_space_and_dataset_ids(
285
+ space_id: str | None, dataset_id: str | None
286
+ ) -> tuple[str | None, str | None]:
287
+ if space_id is not None and "/" not in space_id:
288
+ username = huggingface_hub.whoami()["name"]
289
+ space_id = f"{username}/{space_id}"
290
+ if dataset_id is not None and "/" not in dataset_id:
291
+ username = huggingface_hub.whoami()["name"]
292
+ dataset_id = f"{username}/{dataset_id}"
293
+ if space_id is not None and dataset_id is None:
294
+ dataset_id = f"{space_id}-dataset"
295
+ return space_id, dataset_id
296
+
297
+
298
+ def fibo():
299
+ """Generator for Fibonacci backoff: 1, 1, 2, 3, 5, 8, ..."""
300
+ a, b = 1, 1
301
+ while True:
302
+ yield a
303
+ a, b = b, a + b
304
+
305
+
306
+ COLOR_PALETTE = [
307
+ "#3B82F6",
308
+ "#EF4444",
309
+ "#10B981",
310
+ "#F59E0B",
311
+ "#8B5CF6",
312
+ "#EC4899",
313
+ "#06B6D4",
314
+ "#84CC16",
315
+ "#F97316",
316
+ "#6366F1",
317
+ ]
318
+
319
+
320
+ def get_color_mapping(runs: list[str], smoothing: bool) -> dict[str, str]:
321
+ """Generate color mapping for runs, with transparency for original data when smoothing is enabled."""
322
+ color_map = {}
323
+
324
+ for i, run in enumerate(runs):
325
+ base_color = COLOR_PALETTE[i % len(COLOR_PALETTE)]
326
+
327
+ if smoothing:
328
+ color_map[f"{run}_smoothed"] = base_color
329
+ color_map[f"{run}_original"] = base_color + "4D"
330
+ else:
331
+ color_map[run] = base_color
332
+
333
+ return color_map
334
+
335
+
336
+ def downsample(
337
+ df: pd.DataFrame,
338
+ x: str,
339
+ y: str,
340
+ color: str | None,
341
+ x_lim: tuple[float, float] | None = None,
342
+ ) -> pd.DataFrame:
343
+ if df.empty:
344
+ return df
345
+
346
+ columns_to_keep = [x, y]
347
+ if color is not None and color in df.columns:
348
+ columns_to_keep.append(color)
349
+ df = df[columns_to_keep].copy()
350
+
351
+ n_bins = 100
352
+
353
+ if color is not None and color in df.columns:
354
+ groups = df.groupby(color)
355
+ else:
356
+ groups = [(None, df)]
357
+
358
+ downsampled_indices = []
359
+
360
+ for _, group_df in groups:
361
+ if group_df.empty:
362
+ continue
363
+
364
+ group_df = group_df.sort_values(x)
365
+
366
+ if x_lim is not None:
367
+ x_min, x_max = x_lim
368
+ before_point = group_df[group_df[x] < x_min].tail(1)
369
+ after_point = group_df[group_df[x] > x_max].head(1)
370
+ group_df = group_df[(group_df[x] >= x_min) & (group_df[x] <= x_max)]
371
+ else:
372
+ before_point = after_point = None
373
+ x_min = group_df[x].min()
374
+ x_max = group_df[x].max()
375
+
376
+ if before_point is not None and not before_point.empty:
377
+ downsampled_indices.extend(before_point.index.tolist())
378
+ if after_point is not None and not after_point.empty:
379
+ downsampled_indices.extend(after_point.index.tolist())
380
+
381
+ if group_df.empty:
382
+ continue
383
+
384
+ if x_min == x_max:
385
+ min_y_idx = group_df[y].idxmin()
386
+ max_y_idx = group_df[y].idxmax()
387
+ if min_y_idx != max_y_idx:
388
+ downsampled_indices.extend([min_y_idx, max_y_idx])
389
+ else:
390
+ downsampled_indices.append(min_y_idx)
391
+ continue
392
+
393
+ if len(group_df) < 500:
394
+ downsampled_indices.extend(group_df.index.tolist())
395
+ continue
396
+
397
+ bins = np.linspace(x_min, x_max, n_bins + 1)
398
+ group_df["bin"] = pd.cut(
399
+ group_df[x], bins=bins, labels=False, include_lowest=True
400
+ )
401
+
402
+ for bin_idx in group_df["bin"].dropna().unique():
403
+ bin_data = group_df[group_df["bin"] == bin_idx]
404
+ if bin_data.empty:
405
+ continue
406
+
407
+ min_y_idx = bin_data[y].idxmin()
408
+ max_y_idx = bin_data[y].idxmax()
409
+
410
+ downsampled_indices.append(min_y_idx)
411
+ if min_y_idx != max_y_idx:
412
+ downsampled_indices.append(max_y_idx)
413
+
414
+ unique_indices = list(set(downsampled_indices))
415
+
416
+ downsampled_df = df.loc[unique_indices].copy()
417
+ downsampled_df = downsampled_df.sort_values(x).reset_index(drop=True)
418
+ downsampled_df = downsampled_df.drop(columns=["bin"], errors="ignore")
419
+
420
+ return downsampled_df
421
+
422
+
423
+ def sort_metrics_by_prefix(metrics: list[str]) -> list[str]:
424
+ """
425
+ Sort metrics by grouping prefixes together for dropdown/list display.
426
+ Metrics without prefixes come first, then grouped by prefix.
427
+
428
+ Args:
429
+ metrics: List of metric names
430
+
431
+ Returns:
432
+ List of metric names sorted by prefix
433
+
434
+ Example:
435
+ Input: ["train/loss", "loss", "train/acc", "val/loss"]
436
+ Output: ["loss", "train/acc", "train/loss", "val/loss"]
437
+ """
438
+ groups = group_metrics_by_prefix(metrics)
439
+ result = []
440
+
441
+ if "charts" in groups:
442
+ result.extend(groups["charts"])
443
+
444
+ for group_name in sorted(groups.keys()):
445
+ if group_name != "charts":
446
+ result.extend(groups[group_name])
447
+
448
+ return result
449
+
450
+
451
+ def group_metrics_by_prefix(metrics: list[str]) -> dict[str, list[str]]:
452
+ """
453
+ Group metrics by their prefix. Metrics without prefix go to 'charts' group.
454
+
455
+ Args:
456
+ metrics: List of metric names
457
+
458
+ Returns:
459
+ Dictionary with prefix names as keys and lists of metrics as values
460
+
461
+ Example:
462
+ Input: ["loss", "accuracy", "train/loss", "train/acc", "val/loss"]
463
+ Output: {
464
+ "charts": ["loss", "accuracy"],
465
+ "train": ["train/loss", "train/acc"],
466
+ "val": ["val/loss"]
467
+ }
468
+ """
469
+ no_prefix = []
470
+ with_prefix = []
471
+
472
+ for metric in metrics:
473
+ if "/" in metric:
474
+ with_prefix.append(metric)
475
+ else:
476
+ no_prefix.append(metric)
477
+
478
+ no_prefix.sort()
479
+
480
+ prefix_groups = {}
481
+ for metric in with_prefix:
482
+ prefix = metric.split("/")[0]
483
+ if prefix not in prefix_groups:
484
+ prefix_groups[prefix] = []
485
+ prefix_groups[prefix].append(metric)
486
+
487
+ for prefix in prefix_groups:
488
+ prefix_groups[prefix].sort()
489
+
490
+ groups = {}
491
+ if no_prefix:
492
+ groups["charts"] = no_prefix
493
+
494
+ for prefix in sorted(prefix_groups.keys()):
495
+ groups[prefix] = prefix_groups[prefix]
496
+
497
+ return groups
498
+
499
+
500
+ def group_metrics_with_subprefixes(metrics: list[str]) -> dict:
501
+ """
502
+ Group metrics with simple 2-level nested structure detection.
503
+
504
+ Returns a dictionary where each prefix group can have:
505
+ - direct_metrics: list of metrics at this level (e.g., "train/acc")
506
+ - subgroups: dict of subgroup name -> list of metrics (e.g., "loss" -> ["train/loss/norm", "train/loss/unnorm"])
507
+
508
+ Example:
509
+ Input: ["loss", "train/acc", "train/loss/normalized", "train/loss/unnormalized", "val/loss"]
510
+ Output: {
511
+ "charts": {
512
+ "direct_metrics": ["loss"],
513
+ "subgroups": {}
514
+ },
515
+ "train": {
516
+ "direct_metrics": ["train/acc"],
517
+ "subgroups": {
518
+ "loss": ["train/loss/normalized", "train/loss/unnormalized"]
519
+ }
520
+ },
521
+ "val": {
522
+ "direct_metrics": ["val/loss"],
523
+ "subgroups": {}
524
+ }
525
+ }
526
+ """
527
+ result = {}
528
+
529
+ for metric in metrics:
530
+ if "/" not in metric:
531
+ if "charts" not in result:
532
+ result["charts"] = {"direct_metrics": [], "subgroups": {}}
533
+ result["charts"]["direct_metrics"].append(metric)
534
+ else:
535
+ parts = metric.split("/")
536
+ main_prefix = parts[0]
537
+
538
+ if main_prefix not in result:
539
+ result[main_prefix] = {"direct_metrics": [], "subgroups": {}}
540
+
541
+ if len(parts) == 2:
542
+ result[main_prefix]["direct_metrics"].append(metric)
543
+ else:
544
+ subprefix = parts[1]
545
+ if subprefix not in result[main_prefix]["subgroups"]:
546
+ result[main_prefix]["subgroups"][subprefix] = []
547
+ result[main_prefix]["subgroups"][subprefix].append(metric)
548
+
549
+ for group_data in result.values():
550
+ group_data["direct_metrics"].sort()
551
+ for subgroup_metrics in group_data["subgroups"].values():
552
+ subgroup_metrics.sort()
553
+
554
+ if "charts" in result and not result["charts"]["direct_metrics"]:
555
+ del result["charts"]
556
+
557
+ return result
558
+
559
+
560
+ def get_sync_status(scheduler: "CommitScheduler | DummyCommitScheduler") -> int | None:
561
+ """Get the sync status from the CommitScheduler in an integer number of minutes, or None if not synced yet."""
562
+ if getattr(
563
+ scheduler, "last_push_time", None
564
+ ): # DummyCommitScheduler doesn't have last_push_time
565
+ time_diff = time.time() - scheduler.last_push_time
566
+ return int(time_diff / 60)
567
+ else:
568
+ return None
version.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.2.9