Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Trackio API Client for Hugging Face Spaces | |
| Connects to the Trackio Space using the actual API endpoints | |
| """ | |
| import requests | |
| import json | |
| import time | |
| import logging | |
| from typing import Dict, Any, Optional | |
| from datetime import datetime | |
| # Setup logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class TrackioAPIClient: | |
| """API client for Trackio Space""" | |
| def __init__(self, space_url: str): | |
| self.space_url = space_url.rstrip('/') | |
| self.base_url = f"{self.space_url}/gradio_api/call" | |
| def _make_api_call(self, endpoint: str, data: list, max_retries: int = 3) -> Dict[str, Any]: | |
| """Make an API call to the Trackio Space""" | |
| url = f"{self.base_url}/{endpoint}" | |
| payload = { | |
| "data": data | |
| } | |
| for attempt in range(max_retries): | |
| try: | |
| logger.debug(f"Attempt {attempt + 1}: Making POST request to {url}") | |
| # POST request to get EVENT_ID | |
| response = requests.post( | |
| url, | |
| json=payload, | |
| headers={"Content-Type": "application/json"}, | |
| timeout=30 | |
| ) | |
| if response.status_code != 200: | |
| logger.error(f"POST request failed: {response.status_code} - {response.text}") | |
| if attempt < max_retries - 1: | |
| time.sleep(2 ** attempt) # Exponential backoff | |
| continue | |
| return {"error": f"POST failed: {response.status_code}"} | |
| # Extract EVENT_ID from response | |
| response_data = response.json() | |
| logger.debug(f"POST response: {response_data}") | |
| # Check for event_id (correct field name) | |
| if "event_id" in response_data: | |
| event_id = response_data["event_id"] | |
| elif "hash" in response_data: | |
| event_id = response_data["hash"] | |
| else: | |
| logger.error(f"No event_id or hash in response: {response_data}") | |
| return {"error": "No EVENT_ID in response"} | |
| # GET request to get results | |
| get_url = f"{url}/{event_id}" | |
| logger.debug(f"Making GET request to: {get_url}") | |
| # Wait a bit for the processing to complete | |
| time.sleep(1) | |
| get_response = requests.get(get_url, timeout=30) | |
| if get_response.status_code != 200: | |
| logger.error(f"GET request failed: {get_response.status_code} - {get_response.text}") | |
| if attempt < max_retries - 1: | |
| time.sleep(2 ** attempt) | |
| continue | |
| return {"error": f"GET failed: {get_response.status_code}"} | |
| # Check if response is empty | |
| if not get_response.content: | |
| logger.warning(f"Empty response from GET request (attempt {attempt + 1})") | |
| if attempt < max_retries - 1: | |
| time.sleep(2 ** attempt) | |
| continue | |
| return {"error": "Empty response from server"} | |
| # Parse the response - handle both JSON and SSE formats | |
| response_text = get_response.text.strip() | |
| logger.debug(f"Raw response: {response_text}") | |
| # Try to parse as JSON first | |
| try: | |
| result_data = get_response.json() | |
| logger.debug(f"Parsed as JSON: {result_data}") | |
| if "data" in result_data and len(result_data["data"]) > 0: | |
| return {"success": True, "data": result_data["data"][0]} | |
| else: | |
| logger.warning(f"No data in JSON response (attempt {attempt + 1}): {result_data}") | |
| if attempt < max_retries - 1: | |
| time.sleep(2 ** attempt) | |
| continue | |
| return {"error": "No data in JSON response", "raw": result_data} | |
| except json.JSONDecodeError: | |
| # Try to parse as Server-Sent Events (SSE) format | |
| logger.debug("Response is not JSON, trying SSE format") | |
| # Parse SSE format: "event: complete\ndata: [\"message\"]" | |
| lines = response_text.split('\n') | |
| data_line = None | |
| for line in lines: | |
| if line.startswith('data: '): | |
| data_line = line[6:] # Remove 'data: ' prefix | |
| break | |
| if data_line: | |
| try: | |
| # Parse the data array from SSE | |
| import ast | |
| data_array = ast.literal_eval(data_line) | |
| if isinstance(data_array, list) and len(data_array) > 0: | |
| result_message = data_array[0] | |
| logger.debug(f"Parsed SSE data: {result_message}") | |
| return {"success": True, "data": result_message} | |
| else: | |
| logger.warning(f"Invalid SSE data format (attempt {attempt + 1}): {data_array}") | |
| if attempt < max_retries - 1: | |
| time.sleep(2 ** attempt) | |
| continue | |
| return {"error": "Invalid SSE data format", "raw": data_array} | |
| except (ValueError, SyntaxError) as e: | |
| logger.error(f"Failed to parse SSE data: {e}") | |
| logger.debug(f"Raw SSE data: {data_line}") | |
| if attempt < max_retries - 1: | |
| time.sleep(2 ** attempt) | |
| continue | |
| return {"error": f"Failed to parse SSE data: {e}"} | |
| else: | |
| logger.error(f"No data line found in SSE response") | |
| if attempt < max_retries - 1: | |
| time.sleep(2 ** attempt) | |
| continue | |
| return {"error": "No data line in SSE response", "raw": response_text} | |
| except requests.exceptions.RequestException as e: | |
| logger.error(f"API call failed (attempt {attempt + 1}): {e}") | |
| if attempt < max_retries - 1: | |
| time.sleep(2 ** attempt) | |
| continue | |
| return {"error": f"Request failed: {e}"} | |
| except Exception as e: | |
| logger.error(f"Unexpected error (attempt {attempt + 1}): {e}") | |
| if attempt < max_retries - 1: | |
| time.sleep(2 ** attempt) | |
| continue | |
| return {"error": f"Unexpected error: {e}"} | |
| return {"error": f"Failed after {max_retries} attempts"} | |
| def create_experiment(self, name: str, description: str = "") -> Dict[str, Any]: | |
| """Create a new experiment""" | |
| logger.info(f"Creating experiment: {name}") | |
| result = self._make_api_call("create_experiment_interface", [name, description]) | |
| if "success" in result: | |
| logger.info(f"Experiment created successfully: {result['data']}") | |
| return result | |
| else: | |
| logger.error(f"Failed to create experiment: {result}") | |
| return result | |
| def log_metrics(self, experiment_id: str, metrics: Dict[str, Any], step: Optional[int] = None) -> Dict[str, Any]: | |
| """Log metrics for an experiment""" | |
| metrics_json = json.dumps(metrics) | |
| step_str = str(step) if step is not None else "" | |
| logger.info(f"Logging metrics for experiment {experiment_id} at step {step}") | |
| result = self._make_api_call("log_metrics_interface", [experiment_id, metrics_json, step_str]) | |
| if "success" in result: | |
| logger.info(f"Metrics logged successfully: {result['data']}") | |
| return result | |
| else: | |
| logger.error(f"Failed to log metrics: {result}") | |
| return result | |
| def log_parameters(self, experiment_id: str, parameters: Dict[str, Any]) -> Dict[str, Any]: | |
| """Log parameters for an experiment""" | |
| parameters_json = json.dumps(parameters) | |
| logger.info(f"Logging parameters for experiment {experiment_id}") | |
| result = self._make_api_call("log_parameters_interface", [experiment_id, parameters_json]) | |
| if "success" in result: | |
| logger.info(f"Parameters logged successfully: {result['data']}") | |
| return result | |
| else: | |
| logger.error(f"Failed to log parameters: {result}") | |
| return result | |
| def get_experiment_details(self, experiment_id: str) -> Dict[str, Any]: | |
| """Get experiment details""" | |
| logger.info(f"Getting details for experiment {experiment_id}") | |
| result = self._make_api_call("get_experiment_details", [experiment_id]) | |
| if "success" in result: | |
| logger.info(f"Experiment details retrieved: {result['data'][:100]}...") | |
| return result | |
| else: | |
| logger.error(f"Failed to get experiment details: {result}") | |
| return result | |
| def list_experiments(self) -> Dict[str, Any]: | |
| """List all experiments""" | |
| logger.info("Listing all experiments") | |
| result = self._make_api_call("list_experiments_interface", []) | |
| if "success" in result: | |
| logger.info(f"Experiments listed: {result['data'][:100]}...") | |
| return result | |
| else: | |
| logger.error(f"Failed to list experiments: {result}") | |
| return result | |
| def update_experiment_status(self, experiment_id: str, status: str) -> Dict[str, Any]: | |
| """Update experiment status""" | |
| logger.info(f"Updating experiment {experiment_id} status to {status}") | |
| result = self._make_api_call("update_experiment_status_interface", [experiment_id, status]) | |
| if "success" in result: | |
| logger.info(f"Status updated successfully: {result['data']}") | |
| return result | |
| else: | |
| logger.error(f"Failed to update status: {result}") | |
| return result | |
| def simulate_training_data(self, experiment_id: str) -> Dict[str, Any]: | |
| """Simulate training data for demonstration""" | |
| logger.info(f"Simulating training data for experiment {experiment_id}") | |
| result = self._make_api_call("simulate_training_data", [experiment_id]) | |
| if "success" in result: | |
| logger.info(f"Training data simulated: {result['data']}") | |
| return result | |
| else: | |
| logger.error(f"Failed to simulate training data: {result}") | |
| return result | |
| def get_training_metrics(self, experiment_id: str) -> Dict[str, Any]: | |
| """Get training metrics for an experiment""" | |
| logger.info(f"Getting training metrics for experiment {experiment_id}") | |
| result = self._make_api_call("get_training_metrics", [experiment_id]) | |
| if "success" in result: | |
| logger.info(f"Training metrics retrieved: {result['data'][:100]}...") | |
| return result | |
| else: | |
| logger.error(f"Failed to get training metrics: {result}") | |
| return result | |
| def get_experiment_metrics_history(self, experiment_id: str) -> Dict[str, Any]: | |
| """Get complete metrics history for an experiment""" | |
| logger.info(f"Getting metrics history for experiment {experiment_id}") | |
| result = self._make_api_call("get_metrics_history", [experiment_id]) | |
| if "success" in result: | |
| logger.info(f"Metrics history retrieved: {result['data'][:100]}...") | |
| return result | |
| else: | |
| logger.error(f"Failed to get metrics history: {result}") | |
| return result | |
| def test_simple_connection(): | |
| """Test basic connectivity to the Space""" | |
| print("π Testing Basic Space Connectivity") | |
| print("=" * 50) | |
| try: | |
| # Test basic connectivity | |
| response = requests.get("https://tonic-test-trackio-test.hf.space", timeout=10) | |
| if response.status_code == 200: | |
| print("β Space is accessible") | |
| return True | |
| else: | |
| print(f"β Space returned status: {response.status_code}") | |
| return False | |
| except Exception as e: | |
| print(f"β Cannot connect to Space: {e}") | |
| return False | |
| def test_api_connection(): | |
| """Test the API connection""" | |
| print("π Testing Trackio API Connection") | |
| print("=" * 50) | |
| # First test basic connectivity | |
| if not test_simple_connection(): | |
| return | |
| # Initialize client | |
| client = TrackioAPIClient("https://tonic-test-trackio-test.hf.space") | |
| # Test 1: Create experiment | |
| print("\n1. Testing experiment creation...") | |
| create_result = client.create_experiment( | |
| "test_experiment_api", | |
| "Test experiment created via API" | |
| ) | |
| if "success" in create_result: | |
| print("β Experiment created successfully") | |
| # Extract experiment ID from the response | |
| response_text = create_result['data'] | |
| # Look for experiment ID in the response | |
| if "exp_" in response_text: | |
| # Extract the experiment ID | |
| import re | |
| match = re.search(r'exp_\d{8}_\d{6}', response_text) | |
| if match: | |
| experiment_id = match.group() | |
| print(f" Experiment ID: {experiment_id}") | |
| # Test 2: Log parameters | |
| print("\n2. Testing parameter logging...") | |
| parameters = { | |
| "model_name": "HuggingFaceTB/SmolLM3-3B", | |
| "batch_size": 8, | |
| "learning_rate": 3.5e-6, | |
| "max_iters": 18000 | |
| } | |
| param_result = client.log_parameters(experiment_id, parameters) | |
| if "success" in param_result: | |
| print("β Parameters logged successfully") | |
| else: | |
| print(f"β Failed to log parameters: {param_result}") | |
| # Test 3: Log metrics | |
| print("\n3. Testing metrics logging...") | |
| metrics = { | |
| "loss": 0.5234, | |
| "accuracy": 0.8567, | |
| "learning_rate": 3.5e-6, | |
| "gpu_memory_gb": 22.5 | |
| } | |
| metrics_result = client.log_metrics(experiment_id, metrics, 100) | |
| if "success" in metrics_result: | |
| print("β Metrics logged successfully") | |
| else: | |
| print(f"β Failed to log metrics: {metrics_result}") | |
| # Test 4: List experiments | |
| print("\n4. Testing experiment listing...") | |
| list_result = client.list_experiments() | |
| if "success" in list_result: | |
| print("β Experiments listed successfully") | |
| try: | |
| response_preview = list_result['data'][:200] | |
| print(f" Response: {response_preview}...") | |
| except UnicodeEncodeError: | |
| print(f" Response: {list_result['data'][:100].encode('utf-8', errors='ignore').decode('utf-8')}...") | |
| else: | |
| print(f"β Failed to list experiments: {list_result}") | |
| # Test 5: Get experiment details | |
| print("\n5. Testing experiment details...") | |
| details_result = client.get_experiment_details(experiment_id) | |
| if "success" in details_result: | |
| print("β Experiment details retrieved successfully") | |
| try: | |
| response_preview = details_result['data'][:200] | |
| print(f" Response: {response_preview}...") | |
| except UnicodeEncodeError: | |
| print(f" Response: {details_result['data'][:100].encode('utf-8', errors='ignore').decode('utf-8')}...") | |
| else: | |
| print(f"β Failed to get experiment details: {details_result}") | |
| else: | |
| print("β Could not extract experiment ID from response") | |
| else: | |
| print("β No experiment ID found in response") | |
| else: | |
| print(f"β Failed to create experiment: {create_result}") | |
| print("\n" + "=" * 50) | |
| print("π― API Test Complete") | |
| print("=" * 50) | |
| def create_real_experiment(): | |
| """Create a real experiment for your training""" | |
| print("π Creating Real Experiment for Training") | |
| print("=" * 50) | |
| client = TrackioAPIClient("https://tonic-test-trackio-test.hf.space") | |
| # Create experiment | |
| create_result = client.create_experiment( | |
| "petit-elle-l-aime-3-balanced", | |
| "SmolLM3 fine-tuning on OpenHermes-FR dataset with balanced A100 configuration" | |
| ) | |
| if "success" in create_result: | |
| print("β Experiment created successfully") | |
| print(f"Response: {create_result['data']}") | |
| # Extract experiment ID | |
| import re | |
| match = re.search(r'exp_\d{8}_\d{6}', create_result['data']) | |
| if match: | |
| experiment_id = match.group() | |
| print(f"π Experiment ID: {experiment_id}") | |
| # Log initial parameters | |
| parameters = { | |
| "model_name": "HuggingFaceTB/SmolLM3-3B", | |
| "dataset_name": "legmlai/openhermes-fr", | |
| "batch_size": 8, | |
| "gradient_accumulation_steps": 16, | |
| "effective_batch_size": 128, | |
| "learning_rate": 3.5e-6, | |
| "max_iters": 18000, | |
| "max_seq_length": 12288, | |
| "mixed_precision": "bf16", | |
| "use_flash_attention": True, | |
| "optimizer": "adamw_torch", | |
| "scheduler": "cosine", | |
| "warmup_steps": 1200, | |
| "save_steps": 2000, | |
| "eval_steps": 1000, | |
| "logging_steps": 25, | |
| "no_think_system_message": True | |
| } | |
| param_result = client.log_parameters(experiment_id, parameters) | |
| if "success" in param_result: | |
| print("β Initial parameters logged") | |
| else: | |
| print(f"β Failed to log parameters: {param_result}") | |
| return experiment_id | |
| else: | |
| print("β Could not extract experiment ID") | |
| return None | |
| else: | |
| print(f"β Failed to create experiment: {create_result}") | |
| return None | |
| if __name__ == "__main__": | |
| # Test the API connection | |
| test_api_connection() | |
| print("\n" + "=" * 60) | |
| print("π― CREATING REAL EXPERIMENT") | |
| print("=" * 60) | |
| # Create real experiment | |
| experiment_id = create_real_experiment() | |
| if experiment_id: | |
| print(f"\nβ SUCCESS! Your experiment is ready:") | |
| print(f" Experiment ID: {experiment_id}") | |
| print(f" Trackio Space: https://tonic-test-trackio-test.hf.space") | |
| print(f" View experiments in the 'View Experiments' tab") | |
| print(f"\nπ Next steps:") | |
| print(f"1. Use this experiment ID in your training script") | |
| print(f"2. Monitor progress in the Trackio Space") | |
| print(f"3. Log metrics as training progresses") | |
| else: | |
| print("\nβ Failed to create experiment") |