Hermit11 commited on
Commit
6d6abcb
·
verified ·
1 Parent(s): a66ad68

Upload 7 files

Browse files
Files changed (7) hide show
  1. .gitattributes +35 -0
  2. .gitignore +17 -0
  3. README.md +139 -0
  4. app.py +279 -0
  5. config.yaml +9 -0
  6. pyproject.toml +12 -0
  7. requirements.txt +6 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environment and configuration files
2
+ .env
3
+
4
+ # Python
5
+ __pycache__/
6
+ *.pyc
7
+
8
+ # Cache and local files
9
+ .cache/
10
+ .local/
11
+ .upm/
12
+
13
+ # Replit
14
+ replit.nix
15
+
16
+ # OS files
17
+ .DS_Store
README.md ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Alpha9 Miners Dashboard
3
+ emoji: 🧠
4
+ colorFrom: indigo
5
+ colorTo: purple
6
+ sdk: streamlit
7
+ sdk_version: 1.28.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ # Alpha9 Training Dashboard 🧠
13
+
14
+ Real-time monitoring dashboard for the Alpha9 Bittensor network, displaying training metrics and performance data from decentralized AI training operations.
15
+
16
+ You can find the dashboard here: [Hermit11/A9-Dashboard](https://huggingface.co/spaces/Hermit11/A9-Dashboard).
17
+
18
+ ## Features
19
+ - Real-time training progress monitoring
20
+ - Historical analysis of training metrics
21
+ - Miner performance rankings and geographical distribution
22
+ - Network status overview
23
+ - Auto-refreshing metrics
24
+
25
+ ## System Requirements
26
+ - Python 3.8+
27
+ - 2GB RAM minimum
28
+ - Internet connection for real-time updates
29
+ - Hugging Face account and API token
30
+
31
+ ## Getting Started
32
+
33
+ ### Prerequisites
34
+
35
+ 1. Get a Hugging Face Account and Token:
36
+ - Create an account at [Hugging Face](https://huggingface.co/)
37
+ - Generate an access token from [Settings → Access Tokens](https://huggingface.co/settings/tokens)
38
+ - Make sure you have read access to the metrics repository
39
+
40
+ 2. Clone the repository:
41
+ ```bash
42
+ git clone https://github.com/bigideainc/A9Labs-Dashboard.git
43
+ cd A9Labs-Dashboard
44
+ ```
45
+
46
+ 3. Set up your Python environment:
47
+ ```bash
48
+ python -m venv venv
49
+ source venv/bin/activate # On Windows: venv\Scripts\activate
50
+ ```
51
+
52
+ 4. Install dependencies:
53
+ ```bash
54
+ pip install -r requirements.txt
55
+ ```
56
+
57
+ ### Configuration
58
+
59
+ 1. Create a `.env` file in the project root:
60
+ ```bash
61
+ HF_TOKEN="your_hugging_face_token_here"
62
+ CENTRAL_REPO="Tobius/yogpt_test" # or your metrics repository
63
+ ```
64
+
65
+ ### Running Locally
66
+
67
+ 1. Start the dashboard:
68
+ ```bash
69
+ streamlit run app.py
70
+ ```
71
+
72
+ 2. Access the dashboard in your browser:
73
+ - The dashboard will automatically open at `http://localhost:8501`
74
+ - For remote access, use the network URL provided in the terminal
75
+
76
+ ## Dashboard Sections
77
+
78
+ ### Training Progress
79
+ - Overall progress bar showing completion percentage
80
+ - Total tokens processed
81
+ - Target token goal
82
+
83
+ ### Training Metrics
84
+ - Loss curves
85
+ - Perplexity measurements
86
+ - Tokens per second performance
87
+ - Learning rate adaptation
88
+
89
+ ### Network Overview
90
+ - Active miners leaderboard
91
+ - Geographical distribution map
92
+ - Real-time status indicators
93
+
94
+ ## Development
95
+
96
+ ### Project Structure
97
+ ```
98
+ A9-Dashboard/
99
+ ├── app.py # Main dashboard application
100
+ ├── utils/
101
+ │ └── HFManager.py # Hugging Face integration utilities
102
+ ├── requirements.txt # Project dependencies
103
+ └── .env # Environment configuration
104
+ ```
105
+
106
+ ### Contributing
107
+ 1. Fork the repository
108
+ 2. Create a feature branch
109
+ 3. Commit your changes
110
+ 4. Push to the branch
111
+ 5. Create a Pull Request
112
+
113
+ ## Accessing the Hosted Dashboard
114
+
115
+ The dashboard is hosted as a Hugging Face Space at [Hermit11/A9-Dashboard](https://huggingface.co/spaces/Hermit11/A9-Dashboard).
116
+
117
+ ### Authentication
118
+ - No authentication required for viewing
119
+ - HF token required for deployment and modifications
120
+
121
+ ## Troubleshooting
122
+
123
+ ### Common Issues
124
+
125
+ 1. "No Hugging Face token found":
126
+ - Ensure your `.env` file contains a valid `HF_TOKEN`
127
+ - Check token permissions on Hugging Face
128
+
129
+ 2. "Cannot connect to metrics repository":
130
+ - Verify repository access permissions
131
+ - Check internet connection
132
+ - Confirm repository name in `.env`
133
+
134
+ ### Support
135
+ - Create an issue in the GitHub repository
136
+ - Contact the development team through [GitHub Issues](https://github.com/bigideainc/A9Labs-Dashboard/issues)
137
+
138
+ ## License
139
+ This project is licensed under the MIT License - see the LICENSE file for details.
app.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import time
3
+ from datetime import datetime
4
+ import logging
5
+ from utils.HFManager import fetch_training_metrics_commits
6
+ import pandas as pd
7
+ import os
8
+ from dotenv import load_dotenv
9
+ import plotly.graph_objects as go
10
+ import pydeck as pdk
11
+
12
+ # Load environment variables
13
+ load_dotenv()
14
+
15
+ # Configure logging
16
+ logging.basicConfig(level=logging.INFO,
17
+ format='%(asctime)s - %(levelname)s - %(message)s')
18
+
19
+ # Page config
20
+ st.set_page_config(page_title="Alpha9 Miner Dashboard",
21
+ page_icon="🧠",
22
+ layout="wide",
23
+ menu_items={
24
+ 'Get Help': 'https://github.com/Alpha9-Omega/YoGPT',
25
+ 'Report a bug': "https://github.com/Alpha9-Omega/YoGPT/issues",
26
+ 'About': "Dashboard for monitoring Alpha9 Bittensor and Commune miners"
27
+ })
28
+
29
+ # Custom CSS for progress bar and styling
30
+ st.markdown("""
31
+ <style>
32
+ .stProgress > div > div > div > div {
33
+ background-image: linear-gradient(to right, #9146FF, #784CBD);
34
+ }
35
+ .metric-container {
36
+ background-color: #262730;
37
+ padding: 1rem;
38
+ border-radius: 0.5rem;
39
+ }
40
+ .plot-container {
41
+ background-color: #262730;
42
+ border-radius: 0.5rem;
43
+ padding: 1rem;
44
+ }
45
+ </style>
46
+ """, unsafe_allow_html=True)
47
+
48
+ class MetricsManager:
49
+ def __init__(self, repo_name, token):
50
+ if not repo_name:
51
+ raise ValueError("Repository name is required")
52
+ if not token:
53
+ raise ValueError("Hugging Face token is required")
54
+
55
+ self.repo_name = repo_name
56
+ self.token = token
57
+ self.last_update = None
58
+ self.metrics_cache = []
59
+ self.update_interval = 60 # seconds
60
+ logging.info(f"MetricsManager initialized for repo: {repo_name}")
61
+
62
+ def needs_update(self):
63
+ if not self.last_update:
64
+ return True
65
+ return (datetime.now() - self.last_update).total_seconds() > self.update_interval
66
+
67
+ def fetch_latest_metrics(self):
68
+ if self.needs_update():
69
+ logging.info("Fetching fresh metrics from HuggingFace...")
70
+ try:
71
+ self.metrics_cache = fetch_training_metrics_commits(self.repo_name, token=self.token)
72
+ self.last_update = datetime.now()
73
+ logging.info(f"Fetched {len(self.metrics_cache)} metrics entries")
74
+ except Exception as e:
75
+ logging.error(f"Error fetching metrics: {str(e)}")
76
+ return []
77
+ return self.metrics_cache
78
+
79
+ def get_latest_job_metrics(self):
80
+ metrics = self.fetch_latest_metrics()
81
+ if not metrics:
82
+ return None
83
+
84
+ # Group metrics by job_id
85
+ jobs = {}
86
+ for entry in metrics:
87
+ job_id = entry['metrics']['job_id']
88
+ if job_id not in jobs:
89
+ jobs[job_id] = []
90
+ jobs[job_id].append(entry)
91
+
92
+ # Get the latest job
93
+ latest_job_id = max(jobs.keys())
94
+ return jobs[latest_job_id]
95
+
96
+ def get_historical_metrics(self):
97
+ metrics = self.fetch_latest_metrics()
98
+ if not metrics:
99
+ return pd.DataFrame()
100
+
101
+ records = []
102
+ for entry in metrics:
103
+ record = {
104
+ 'timestamp': entry['timestamp'],
105
+ 'miner_uid': entry['miner_uid'],
106
+ 'job_id': entry['metrics']['job_id'],
107
+ 'final_loss': entry['metrics'].get('final_loss', None),
108
+ 'perplexity': entry['metrics'].get('perplexity', None),
109
+ 'tokens_per_second': entry['metrics'].get('tokens_per_second', None),
110
+ 'inner_lr': entry['metrics'].get('inner_lr', None),
111
+ 'location': entry.get('location', 'Unknown'),
112
+ 'model_repo': entry['model_repo']
113
+ }
114
+ records.append(record)
115
+
116
+ df = pd.DataFrame(records)
117
+ try:
118
+ df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y%m%d_%H%M%S')
119
+ except ValueError:
120
+ try:
121
+ df['timestamp'] = pd.to_datetime(df['timestamp'], format='mixed')
122
+ except:
123
+ st.warning("Could not parse some timestamp values")
124
+
125
+ return df.sort_values('timestamp')
126
+
127
+ # Get configuration
128
+ try:
129
+ hf_token = st.secrets["HF_TOKEN"]
130
+ except:
131
+ hf_token = os.getenv("HF_TOKEN")
132
+
133
+ try:
134
+ central_repo = st.secrets["CENTRAL_REPO"]
135
+ except:
136
+ central_repo = os.getenv("CENTRAL_REPO", "Tobius/yogpt_test")
137
+
138
+ if not hf_token:
139
+ st.error("No Hugging Face token found. Please set HF_TOKEN in environment variables.")
140
+ st.stop()
141
+
142
+ # Initialize metrics manager
143
+ if 'metrics_manager' not in st.session_state:
144
+ st.session_state.metrics_manager = MetricsManager(central_repo, hf_token)
145
+
146
+ # Dashboard UI
147
+ st.title("🧠 Alpha9 Training Dashboard")
148
+
149
+ # Progress Bar Section
150
+ latest_metrics = st.session_state.metrics_manager.get_latest_job_metrics()
151
+ if latest_metrics:
152
+ progress = 0.7158 # This should be calculated from actual data
153
+ tokens_progress = "715,899,792,640/1T tokens"
154
+
155
+ st.markdown("### Training Progress")
156
+ st.progress(progress)
157
+ col1, col2 = st.columns([1, 2])
158
+ with col1:
159
+ st.metric("Progress", f"{progress*100:.2f}%")
160
+ with col2:
161
+ st.metric("Tokens", tokens_progress)
162
+
163
+ # Metrics Grid
164
+ st.markdown("### Training Metrics")
165
+ metric_cols = st.columns(2)
166
+ with metric_cols[0]:
167
+ # Loss Plot
168
+ fig_loss = go.Figure()
169
+ fig_loss.add_trace(go.Scatter(x=[1, 2, 3], y=[12, 3, 2],
170
+ mode='lines',
171
+ line=dict(color='#9146FF', width=2),
172
+ name='Loss'))
173
+ fig_loss.update_layout(
174
+ title='Loss',
175
+ xaxis_title='Steps',
176
+ yaxis_title='Loss',
177
+ yaxis_type="log",
178
+ paper_bgcolor='rgba(0,0,0,0)',
179
+ plot_bgcolor='rgba(0,0,0,0)',
180
+ font=dict(color='white')
181
+ )
182
+ st.plotly_chart(fig_loss, use_container_width=True)
183
+
184
+ # Tokens per Second Plot
185
+ fig_tps = go.Figure()
186
+ fig_tps.add_trace(go.Scatter(x=[1, 2, 3], y=[40000, 42000, 41000],
187
+ mode='lines',
188
+ line=dict(color='#9146FF', width=2),
189
+ name='Tokens/s'))
190
+ fig_tps.update_layout(
191
+ title='Tokens per Second',
192
+ xaxis_title='Time',
193
+ yaxis_title='Tokens/s',
194
+ paper_bgcolor='rgba(0,0,0,0)',
195
+ plot_bgcolor='rgba(0,0,0,0)',
196
+ font=dict(color='white')
197
+ )
198
+ st.plotly_chart(fig_tps, use_container_width=True)
199
+
200
+ with metric_cols[1]:
201
+ # Perplexity Plot
202
+ fig_perp = go.Figure()
203
+ fig_perp.add_trace(go.Scatter(x=[1, 2, 3], y=[200, 50, 20],
204
+ mode='lines',
205
+ line=dict(color='#9146FF', width=2),
206
+ name='Perplexity'))
207
+ fig_perp.update_layout(
208
+ title='Perplexity',
209
+ xaxis_title='Steps',
210
+ yaxis_title='Perplexity',
211
+ yaxis_type="log",
212
+ paper_bgcolor='rgba(0,0,0,0)',
213
+ plot_bgcolor='rgba(0,0,0,0)',
214
+ font=dict(color='white')
215
+ )
216
+ st.plotly_chart(fig_perp, use_container_width=True)
217
+
218
+ # Inner LR Plot
219
+ fig_lr = go.Figure()
220
+ fig_lr.add_trace(go.Scatter(x=[1, 2, 3], y=[0.0001, 0.0001, 0.0001],
221
+ mode='lines',
222
+ line=dict(color='#9146FF', width=2),
223
+ name='Inner LR'))
224
+ fig_lr.update_layout(
225
+ title='Inner Learning Rate',
226
+ xaxis_title='Steps',
227
+ yaxis_title='Learning Rate',
228
+ paper_bgcolor='rgba(0,0,0,0)',
229
+ plot_bgcolor='rgba(0,0,0,0)',
230
+ font=dict(color='white')
231
+ )
232
+ st.plotly_chart(fig_lr, use_container_width=True)
233
+
234
+ # Leaderboard and Map
235
+ st.markdown("### Network Overview")
236
+ col1, col2 = st.columns([3, 2])
237
+
238
+ with col1:
239
+ if latest_metrics:
240
+ miner_df = pd.DataFrame([{
241
+ 'Miner UID': m['miner_uid'],
242
+ 'MH/s': round(m['metrics'].get('hashrate', 0) / 1e6, 2),
243
+ 'Location': m.get('location', 'Unknown'),
244
+ 'Status': 'Active'
245
+ } for m in latest_metrics]).sort_values('MH/s', ascending=False)
246
+
247
+ st.dataframe(miner_df, use_container_width=True)
248
+
249
+ with col2:
250
+ # Sample map data
251
+ map_data = pd.DataFrame({
252
+ 'lat': [32.7767, 40.7128, 51.5074],
253
+ 'lon': [-96.7970, -74.0060, -0.1278],
254
+ 'size': [10, 15, 20]
255
+ })
256
+
257
+ st.pydeck_chart(pdk.Deck(
258
+ map_style='mapbox://styles/mapbox/dark-v10',
259
+ initial_view_state=pdk.ViewState(
260
+ latitude=20,
261
+ longitude=0,
262
+ zoom=1,
263
+ pitch=0,
264
+ ),
265
+ layers=[
266
+ pdk.Layer(
267
+ 'ScatterplotLayer',
268
+ data=map_data,
269
+ get_position='[lon, lat]',
270
+ get_color='[145, 70, 255, 160]',
271
+ get_radius='size',
272
+ pickable=True
273
+ ),
274
+ ]
275
+ ))
276
+
277
+ # Auto-refresh
278
+ time.sleep(5)
279
+ st.rerun()
config.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ title: Alpha9 Training Dashboard
2
+ emoji: 🧠
3
+ colorFrom: purple
4
+ colorTo: indigo
5
+ sdk: streamlit
6
+ sdk_version: 1.39.0
7
+ app_file: app.py
8
+ pinned: false
9
+ license: apache-2.0
pyproject.toml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "repl-nix-bittensordash"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ requires-python = ">=3.11"
6
+ dependencies = [
7
+ "numpy>=2.1.2",
8
+ "pandas>=2.2.3",
9
+ "plotly>=5.24.1",
10
+ "scikit-learn>=1.5.2",
11
+ "streamlit>=1.39.0",
12
+ ]
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit>=1.28.0
2
+ gradio>=4.0.0
3
+ pandas
4
+ python-dotenv
5
+ huggingface_hub
6
+ plotly