import streamlit as st import numpy as np import random import torch import transformers from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling from datasets import Dataset from huggingface_hub import HfApi import os import traceback from contextlib import contextmanager import plotly.graph_objects as go import plotly.express as px from datetime import datetime import time import json import pandas as pd # Advanced Cyberpunk Styling def setup_advanced_cyberpunk_style(): st.markdown(""" """, unsafe_allow_html=True) # Fixed prepare_dataset function def prepare_dataset(data, tokenizer, block_size=128): with error_handling("dataset preparation"): def tokenize_function(examples): return tokenizer(examples['text'], truncation=True, max_length=block_size, padding='max_length') raw_dataset = Dataset.from_dict({'text': data}) tokenized_dataset = raw_dataset.map(tokenize_function, batched=True, remove_columns=['text']) tokenized_dataset = tokenized_dataset.map( lambda examples: {'labels': examples['input_ids']}, batched=True ) tokenized_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels']) return tokenized_dataset # Advanced Metrics Visualization def create_training_metrics_plot(fitness_history): fig = go.Figure() fig.add_trace(go.Scatter( y=fitness_history, mode='lines+markers', name='Loss', line=dict(color='#00ff9d', width=2), marker=dict(size=8, symbol='diamond'), )) fig.update_layout( title={ 'text': 'Training Progress', 'y':0.95, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top', 'font': {'family': 'Orbitron', 'size': 24, 'color': '#00ff9d'} }, paper_bgcolor='rgba(0,0,0,0.5)', plot_bgcolor='rgba(0,0,0,0.3)', font=dict(family='Share Tech Mono', color='#00ff9d'), xaxis=dict( title='Generation', gridcolor='rgba(0,255,157,0.1)', zerolinecolor='#00ff9d' ), yaxis=dict( title='Loss', gridcolor='rgba(0,255,157,0.1)', zerolinecolor='#00ff9d' ), hovermode='x unified' ) return fig # Advanced Training Dashboard class TrainingDashboard: def __init__(self): self.metrics = { 'current_loss': 0, 'best_loss': float('inf'), 'generation': 0, 'individual': 0, 'start_time': time.time(), 'training_speed': 0 } self.history = [] def update(self, loss, generation, individual): self.metrics['current_loss'] = loss self.metrics['generation'] = generation self.metrics['individual'] = individual if loss < self.metrics['best_loss']: self.metrics['best_loss'] = loss elapsed_time = time.time() - self.metrics['start_time'] self.metrics['training_speed'] = (generation * individual) / elapsed_time self.history.append({ 'loss': loss, 'timestamp': datetime.now().strftime('%H:%M:%S') }) def display(self): col1, col2, col3 = st.columns(3) with col1: st.markdown("""
Generation: {}/{}
Individual: {}/{}
Current Loss: {:.4f}
Best Loss: {:.4f}
Speed: {:.2f} iter/s
Runtime: {:.2f}m
GPU: {}
Memory Usage: {:.2f}GB