import streamlit as st import numpy as np import random import torch import transformers from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling from datasets import Dataset from huggingface_hub import HfApi import os import traceback from contextlib import contextmanager import plotly.graph_objects as go import plotly.express as px from datetime import datetime import time import json import pandas as pd # Advanced Cyberpunk Styling def setup_advanced_cyberpunk_style(): st.markdown(""" """, unsafe_allow_html=True) # Fixed prepare_dataset function def prepare_dataset(data, tokenizer, block_size=128): with error_handling("dataset preparation"): def tokenize_function(examples): return tokenizer(examples['text'], truncation=True, max_length=block_size, padding='max_length') raw_dataset = Dataset.from_dict({'text': data}) tokenized_dataset = raw_dataset.map(tokenize_function, batched=True, remove_columns=['text']) tokenized_dataset = tokenized_dataset.map( lambda examples: {'labels': examples['input_ids']}, batched=True ) tokenized_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels']) return tokenized_dataset # Advanced Metrics Visualization def create_training_metrics_plot(fitness_history): fig = go.Figure() fig.add_trace(go.Scatter( y=fitness_history, mode='lines+markers', name='Loss', line=dict(color='#00ff9d', width=2), marker=dict(size=8, symbol='diamond'), )) fig.update_layout( title={ 'text': 'Training Progress', 'y':0.95, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top', 'font': {'family': 'Orbitron', 'size': 24, 'color': '#00ff9d'} }, paper_bgcolor='rgba(0,0,0,0.5)', plot_bgcolor='rgba(0,0,0,0.3)', font=dict(family='Share Tech Mono', color='#00ff9d'), xaxis=dict( title='Generation', gridcolor='rgba(0,255,157,0.1)', zerolinecolor='#00ff9d' ), yaxis=dict( title='Loss', gridcolor='rgba(0,255,157,0.1)', zerolinecolor='#00ff9d' ), hovermode='x unified' ) return fig # Advanced Training Dashboard class TrainingDashboard: def __init__(self): self.metrics = { 'current_loss': 0, 'best_loss': float('inf'), 'generation': 0, 'individual': 0, 'start_time': time.time(), 'training_speed': 0 } self.history = [] def update(self, loss, generation, individual): self.metrics['current_loss'] = loss self.metrics['generation'] = generation self.metrics['individual'] = individual if loss < self.metrics['best_loss']: self.metrics['best_loss'] = loss elapsed_time = time.time() - self.metrics['start_time'] self.metrics['training_speed'] = (generation * individual) / elapsed_time self.history.append({ 'loss': loss, 'timestamp': datetime.now().strftime('%H:%M:%S') }) def display(self): col1, col2, col3 = st.columns(3) with col1: st.markdown("""

Current Status

Generation: {}/{}

Individual: {}/{}

""".format( self.metrics['generation'], self.metrics['total_generations'], self.metrics['individual'], self.metrics['population_size'] ), unsafe_allow_html=True) with col2: st.markdown("""

Performance

Current Loss: {:.4f}

Best Loss: {:.4f}

""".format( self.metrics['current_loss'], self.metrics['best_loss'] ), unsafe_allow_html=True) with col3: st.markdown("""

Training Metrics

Speed: {:.2f} iter/s

Runtime: {:.2f}m

""".format( self.metrics['training_speed'], (time.time() - self.metrics['start_time']) / 60 ), unsafe_allow_html=True) def main(): setup_advanced_cyberpunk_style() st.markdown('

Neural Evolution GPT-2 Training Hub

', unsafe_allow_html=True) # Initialize dashboard dashboard = TrainingDashboard() # Advanced Sidebar with st.sidebar: st.markdown("""

Control Panel

""", unsafe_allow_html=True) # Configuration Tabs tab1, tab2, tab3 = st.tabs(["🔧 Setup", "⚙️ Parameters", "📊 Monitoring"]) with tab1: hf_token = st.text_input("🔑 HuggingFace Token", type="password") repo_name = st.text_input("📁 Repository Name", "my-gpt2-model") data_source = st.selectbox('📊 Data Source', ('DEMO', 'Upload Text File')) with tab2: population_size = st.slider("Population Size", 4, 20, 6) num_generations = st.slider("Generations", 1, 10, 3) num_parents = st.slider("Parents", 2, population_size, 2) mutation_rate = st.slider("Mutation Rate", 0.0, 1.0, 0.1) # Advanced Parameters with st.expander("🔬 Advanced Settings"): learning_rate_min = st.number_input("Min Learning Rate", 1e-6, 1e-4, 1e-5) learning_rate_max = st.number_input("Max Learning Rate", 1e-5, 1e-3, 5e-5) batch_size_options = st.multiselect("Batch Sizes", [2, 4, 8, 16], default=[2, 4, 8]) with tab3: st.markdown("""

System Status

GPU: {}

Memory Usage: {:.2f}GB

""".format( 'CUDA' if torch.cuda.is_available() else 'CPU', torch.cuda.memory_allocated() / 1e9 if torch.cuda.is_available() else 0 ), unsafe_allow_html=True) # [Rest of your existing main() function code here, integrated with the dashboard] # Make sure to update the dashboard metrics during training # Example of updating dashboard during training: for generation in range(num_generations): for idx, individual in enumerate(population): # Your existing training code fitness = fitness_function(individual, train_dataset, model_clone, tokenizer) dashboard.update(fitness, generation + 1, idx + 1) dashboard.display() # Update progress progress = (generation * len(population) + idx + 1) / (num_generations * len(population)) st.markdown(f"""
""", unsafe_allow_html=True) if __name__ == "__main__": main()