Spaces:

jbilcke-hf
/

tikslop

Running on CPU Upgrade

App Files Files Community

jbilcke-hf HF Staff commited on 10 days ago

Commit

7dadc22

1 Parent(s): 5707a78

wip

Browse files

Files changed (30) hide show

.claude/settings.local.json +4 -1
api.py +11 -10
assets/config/default.yaml +4 -3
assets/config/tikslop.yaml +3 -4
build/web/flutter_bootstrap.js +1 -1
build/web/flutter_service_worker.js +2 -2
build/web/index.html +1 -1
build/web/main.dart.js +0 -0
lib/config/config.dart +2 -2
lib/screens/settings_screen.dart +61 -3
lib/services/chat_service.dart +3 -0
lib/services/clip_queue/clip_queue_manager.dart +72 -70
lib/services/settings_service.dart +16 -0
lib/services/websocket_api_service.dart +22 -8
lib/utils/colored_logger.dart +270 -0
lib/widgets/chat_widget.dart +3 -2
server/README.md +14 -0
server/__init__.py +4 -0
api_config.py → server/api_config.py +0 -0
api_core.py → server/api_core.py +82 -824
api_metrics.py → server/api_metrics.py +0 -0
api_session.py → server/api_session.py +116 -75
server/chat.py +101 -0
server/config_utils.py +69 -0
server/endpoint_manager.py +109 -0
server/llm_utils.py +297 -0
server/logging_utils.py +132 -0
server/models.py +38 -0
server/utils.py +131 -0
server/video_utils.py +174 -0

.claude/settings.local.json CHANGED Viewed

@@ -1,7 +1,10 @@
 {
   "permissions": {
     "allow": [
-      "Bash(flutter build:*)"
     ],
     "deny": []
   },

 {
   "permissions": {
     "allow": [
+      "Bash(flutter build:*)",
+      "Bash(mv:*)",
+      "Bash(ls:*)",
+      "Bash(python:*)"
     ],
     "deny": []
   },

api.py CHANGED Viewed

@@ -8,17 +8,16 @@ import uuid
 from aiohttp import web, WSMsgType
 from typing import Dict, Any
-from api_core import VideoGenerationAPI
-from api_session import SessionManager
-from api_metrics import MetricsTracker
-from api_config import *
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger(__name__)
 # Create global session and metrics managers
 session_manager = SessionManager()
@@ -175,6 +174,8 @@ async def websocket_handler(request: web.Request) -> web.WebSocketResponse:
                     if action in ['join_chat', 'leave_chat', 'chat_message']:
                         await user_session.chat_queue.put(data)
                     elif action in ['generate_video']:
                         await user_session.video_queue.put(data)
                     elif action == 'search':
                         await user_session.search_queue.put(data)

 from aiohttp import web, WSMsgType
 from typing import Dict, Any
+from server.api_core import VideoGenerationAPI
+from server.api_session import SessionManager
+from server.api_metrics import MetricsTracker
+from server.api_config import *
+# Set up colored logging
+from server.logging_utils import setup_colored_logging, get_logger
+setup_colored_logging()
+logger = get_logger(__name__)
 # Create global session and metrics managers
 session_manager = SessionManager()
                     if action in ['join_chat', 'leave_chat', 'chat_message']:
                         await user_session.chat_queue.put(data)
                     elif action in ['generate_video']:
+                        request_id = data.get('requestId', 'unknown')
+                        #logger.info(f"[{request_id}] Received generate_video request from user {user_id}, adding to video queue")
                         await user_session.video_queue.put(data)
                     elif action == 'search':
                         await user_session.search_queue.put(data)

assets/config/default.yaml CHANGED Viewed

@@ -21,9 +21,10 @@ advertising:
       link: https://huggingface.co/docs/smolagents/index
 simulation:
-  # how often the description should evolve (in seconds)
-  # setting to 0 disables description evolution
-  sim_loop_frequency_in_sec: 8
 video:
   # default negative prompt to filter harmful content

       link: https://huggingface.co/docs/smolagents/index
 simulation:
+  # delay to wait after each simulation loop (in seconds)
+  sim_loop_delay_in_sec: 8
+  enable_sim_loop: false
 video:
   # default negative prompt to filter harmful content

assets/config/tikslop.yaml CHANGED Viewed

@@ -25,11 +25,10 @@ advertising:
 simulation:
   # whether to enable simulation loop to evolve descriptions over time
-  enable_sim_loop: false
-  # how often the description should evolve (in seconds)
-  # setting to 0 disables description evolution
-  sim_loop_frequency_in_sec: 8
 # it's OK to use high values here,
 # because some of those values are limited by the backend config,

 simulation:
   # whether to enable simulation loop to evolve descriptions over time
+  enable_sim_loop: true
+  # delay to wait after each simulation loop (in seconds)
+  sim_loop_delay_in_sec: 8
 # it's OK to use high values here,
 # because some of those values are limited by the backend config,

build/web/flutter_bootstrap.js CHANGED Viewed

@@ -38,6 +38,6 @@ _flutter.buildConfig = {"engineRevision":"1c9c20e7c3dd48c66f400a24d48ea806b4ab31
 _flutter.loader.load({
   serviceWorkerSettings: {
-    serviceWorkerVersion: "401217633"
   }
 });

 _flutter.loader.load({
   serviceWorkerSettings: {
+    serviceWorkerVersion: "2752272678"
   }
 });

build/web/flutter_service_worker.js CHANGED Viewed

@@ -3,12 +3,12 @@ const MANIFEST = 'flutter-app-manifest';
 const TEMP = 'flutter-temp-cache';
 const CACHE_NAME = 'flutter-app-cache';
-const RESOURCES = {"flutter_bootstrap.js": "4704f10d11e3b5570203ced46f420e01",
 "version.json": "68350cac7987de2728345c72918dd067",
 "tikslop.png": "570e1db759046e2d224fef729983634e",
 "index.html": "3a7029b3672560e7938aab6fa4d30a46",
 "/": "3a7029b3672560e7938aab6fa4d30a46",
-"main.dart.js": "3e9b078cf004be6fc60825295d810cf0",
 "tikslop.svg": "26140ba0d153b213b122bc6ebcc17f6c",
 "flutter.js": "888483df48293866f9f41d3d9274a779",
 "favicon.png": "c8a183c516004e648a7bac7497c89b97",

 const TEMP = 'flutter-temp-cache';
 const CACHE_NAME = 'flutter-app-cache';
+const RESOURCES = {"flutter_bootstrap.js": "b92f2a2de5eedfe76230ecb6dfa2b1fe",
 "version.json": "68350cac7987de2728345c72918dd067",
 "tikslop.png": "570e1db759046e2d224fef729983634e",
 "index.html": "3a7029b3672560e7938aab6fa4d30a46",
 "/": "3a7029b3672560e7938aab6fa4d30a46",
+"main.dart.js": "6491f49e4f56ea8d316d8372e4e08020",
 "tikslop.svg": "26140ba0d153b213b122bc6ebcc17f6c",
 "flutter.js": "888483df48293866f9f41d3d9274a779",
 "favicon.png": "c8a183c516004e648a7bac7497c89b97",

build/web/index.html CHANGED Viewed

@@ -156,7 +156,7 @@
   </script>
   <!-- Add version parameter for cache busting -->
-  <script src="flutter_bootstrap.js?v=1753366329" async></script>
   <!-- Add cache busting script -->
   <script>

   </script>
   <!-- Add version parameter for cache busting -->
+  <script src="flutter_bootstrap.js?v=1753441877" async></script>
   <!-- Add cache busting script -->
   <script>

build/web/main.dart.js CHANGED Viewed

The diff for this file is too large to render. See raw diff

lib/config/config.dart CHANGED Viewed

@@ -132,8 +132,8 @@ class Configuration {
   bool get enableSimLoop =>
       _config['simulation']?['enable_sim_loop'] ?? true;
-  int get simLoopFrequencyInSec =>
-      _config['simulation']?['sim_loop_frequency_in_sec'] ?? 0;
   // Computed properties

   bool get enableSimLoop =>
       _config['simulation']?['enable_sim_loop'] ?? true;
+  int get simLoopDelayInSec =>
+      _config['simulation']?['sim_loop_delay_in_sec'] ?? 0;
   // Computed properties

lib/screens/settings_screen.dart CHANGED Viewed

@@ -17,12 +17,14 @@ class _SettingsScreenState extends State<SettingsScreen> {
   final _promptController = TextEditingController();
   final _negativePromptController = TextEditingController();
   final _hfApiKeyController = TextEditingController();
   final _llmApiKeyController = TextEditingController();
   final _modelNameController = TextEditingController();
   final _settingsService = SettingsService();
   final _availabilityService = ModelAvailabilityService();
   bool _showSceneDebugInfo = false;
   bool _enableSimulation = true;
   String _selectedLlmProvider = 'built-in';
   String _selectedLlmModel = 'meta-llama/Llama-3.2-3B-Instruct';
   LLMProvider? _currentProvider;
@@ -39,9 +41,11 @@ class _SettingsScreenState extends State<SettingsScreen> {
     _promptController.text = _settingsService.videoPromptPrefix;
     _negativePromptController.text = _settingsService.negativeVideoPrompt;
     _hfApiKeyController.text = _settingsService.huggingfaceApiKey;
     _llmApiKeyController.text = _settingsService.llmApiKey;
     _showSceneDebugInfo = _settingsService.showSceneDebugInfo;
     _enableSimulation = _settingsService.enableSimulation;
     // Auto-select built-in model if no HF API key
     if (_settingsService.huggingfaceApiKey.isEmpty) {
@@ -73,6 +77,7 @@ class _SettingsScreenState extends State<SettingsScreen> {
     _promptController.dispose();
     _negativePromptController.dispose();
     _hfApiKeyController.dispose();
     _llmApiKeyController.dispose();
     _modelNameController.dispose();
     super.dispose();
@@ -285,6 +290,20 @@ class _SettingsScreenState extends State<SettingsScreen> {
                     },
                   ),
                   const SizedBox(height: 16),
                   // Model selection dropdown
                   DropdownButtonFormField<String>(
                     decoration: InputDecoration(
@@ -713,8 +732,6 @@ class _SettingsScreenState extends State<SettingsScreen> {
                       _settingsService.setShowSceneDebugInfo(value);
                     },
                   ),
-                  /*
-                  let's disable this for now, I still need to work on this
                   SwitchListTile(
                     title: const Text('Enable world simulator engine'),
                     subtitle: const Text('Allow video descriptions to evolve over time using a LLM (this consumes tokens, your Hugging Face account will be billed)'),
@@ -726,7 +743,48 @@ class _SettingsScreenState extends State<SettingsScreen> {
                       _settingsService.setEnableSimulation(value);
                     },
                   ),
-                  */
                   const SizedBox(height: 16),
                   // Clear device connections button
                   ListTile(

   final _promptController = TextEditingController();
   final _negativePromptController = TextEditingController();
   final _hfApiKeyController = TextEditingController();
+  final _gameMasterPromptController = TextEditingController();
   final _llmApiKeyController = TextEditingController();
   final _modelNameController = TextEditingController();
   final _settingsService = SettingsService();
   final _availabilityService = ModelAvailabilityService();
   bool _showSceneDebugInfo = false;
   bool _enableSimulation = true;
+  int _simLoopDelayInSec = 5;
   String _selectedLlmProvider = 'built-in';
   String _selectedLlmModel = 'meta-llama/Llama-3.2-3B-Instruct';
   LLMProvider? _currentProvider;
     _promptController.text = _settingsService.videoPromptPrefix;
     _negativePromptController.text = _settingsService.negativeVideoPrompt;
     _hfApiKeyController.text = _settingsService.huggingfaceApiKey;
+    _gameMasterPromptController.text = _settingsService.gameMasterPrompt;
     _llmApiKeyController.text = _settingsService.llmApiKey;
     _showSceneDebugInfo = _settingsService.showSceneDebugInfo;
     _enableSimulation = _settingsService.enableSimulation;
+    _simLoopDelayInSec = _settingsService.simLoopDelayInSec;
     // Auto-select built-in model if no HF API key
     if (_settingsService.huggingfaceApiKey.isEmpty) {
     _promptController.dispose();
     _negativePromptController.dispose();
     _hfApiKeyController.dispose();
+    _gameMasterPromptController.dispose();
     _llmApiKeyController.dispose();
     _modelNameController.dispose();
     super.dispose();
                     },
                   ),
                   const SizedBox(height: 16),
+                  TextField(
+                    controller: _gameMasterPromptController,
+                    decoration: const InputDecoration(
+                      labelText: 'Game Master Prompt',
+                      hintText: 'Keep things fun and kid-friendly.',
+                      helperText: 'Additional instructions applied to all LLM requests (search, captions, simulations)',
+                      helperMaxLines: 2,
+                    ),
+                    maxLines: 3,
+                    onChanged: (value) {
+                      _settingsService.setGameMasterPrompt(value);
+                    },
+                  ),
+                  const SizedBox(height: 16),
                   // Model selection dropdown
                   DropdownButtonFormField<String>(
                     decoration: InputDecoration(
                       _settingsService.setShowSceneDebugInfo(value);
                     },
                   ),
                   SwitchListTile(
                     title: const Text('Enable world simulator engine'),
                     subtitle: const Text('Allow video descriptions to evolve over time using a LLM (this consumes tokens, your Hugging Face account will be billed)'),
                       _settingsService.setEnableSimulation(value);
                     },
                   ),
+                  // Only show simulation delay setting if user has HF API key
+                  if (_hfApiKeyController.text.isNotEmpty) ...[
+                    const SizedBox(height: 8),
+                    ListTile(
+                      title: const Text('Simulation Loop Delay'),
+                      subtitle: Text('Delay between simulation iterations: ${_simLoopDelayInSec}s (Warning: Short delays consume more LLM tokens)'),
+                      trailing: SizedBox(
+                        width: 200,
+                        child: Row(
+                          mainAxisAlignment: MainAxisAlignment.end,
+                          children: [
+                            IconButton(
+                              onPressed: _simLoopDelayInSec > 0 ? () {
+                                setState(() {
+                                  _simLoopDelayInSec = (_simLoopDelayInSec - 1).clamp(0, 300);
+                                });
+                                _settingsService.setSimLoopDelayInSec(_simLoopDelayInSec);
+                              } : null,
+                              icon: const Icon(Icons.remove),
+                            ),
+                            SizedBox(
+                              width: 50,
+                              child: Text(
+                                '${_simLoopDelayInSec}s',
+                                textAlign: TextAlign.center,
+                                style: const TextStyle(fontWeight: FontWeight.bold),
+                              ),
+                            ),
+                            IconButton(
+                              onPressed: _simLoopDelayInSec < 300 ? () {
+                                setState(() {
+                                  _simLoopDelayInSec = (_simLoopDelayInSec + 1).clamp(0, 300);
+                                });
+                                _settingsService.setSimLoopDelayInSec(_simLoopDelayInSec);
+                              } : null,
+                              icon: const Icon(Icons.add),
+                            ),
+                          ],
+                        ),
+                      ),
+                    ),
+                  ],
                   const SizedBox(height: 16),
                   // Clear device connections button
                   ListTile(

lib/services/chat_service.dart CHANGED Viewed

@@ -146,8 +146,11 @@ class ChatService {
   }
   void _handleChatMessage(ChatMessage message) {
     // Only add messages if they're for the current room
     if (message.videoId == _currentRoomId) {
       _chatController.add(message);
       // Store this message in the recent messages for this room

   }
   void _handleChatMessage(ChatMessage message) {
+    debugPrint('CHAT_DEBUG: ChatService received message - videoId: ${message.videoId}, currentRoom: $_currentRoomId, content: "${message.content}"');
     // Only add messages if they're for the current room
     if (message.videoId == _currentRoomId) {
+      debugPrint('CHAT_DEBUG: Message matches current room, forwarding to controller');
       _chatController.add(message);
       // Store this message in the recent messages for this room

lib/services/clip_queue/clip_queue_manager.dart CHANGED Viewed

@@ -40,11 +40,7 @@ class ClipQueueManager {
   /// Timer for checking the buffer state
   Timer? _bufferCheckTimer;
-  /// Timer for evolving the description
-  Timer? _descriptionEvolutionTimer;
-  /// Last time the description was evolved
-  DateTime _lastDescriptionEvolutionTime = DateTime.now();
   /// Whether the manager is disposed
   bool _isDisposed = false;
@@ -88,20 +84,25 @@ class ClipQueueManager {
     );
     // Start listening to chat messages
     final chatService = ChatService();
     chatService.initialize().then((_) {
       chatService.joinRoom(videoId).then((_) {
         chatService.chatStream.listen(_addChatMessage);
       }).catchError((e) {
-        debugPrint('ClipQueueManager: Error joining chat room: $e');
       });
     }).catchError((e) {
-      debugPrint('ClipQueueManager: Error initializing chat service: $e');
     });
   }
   /// Add a chat message to the recent messages list
   void _addChatMessage(ChatMessage message) {
     if (message.videoId == videoId) {
       _recentChatMessages.add(message);
       // Keep only the 5 most recent messages
@@ -109,6 +110,9 @@ class ClipQueueManager {
         _recentChatMessages.removeAt(0);
       }
       ClipQueueConstants.logEvent('Added chat message: ${message.content.substring(0, min(20, message.content.length))}...');
     }
   }
@@ -156,9 +160,8 @@ class ClipQueueManager {
     );
     _clipBuffer.clear();
-    // Reset evolution counter and last evolution time
     _evolutionCounter = 0;
-    _lastDescriptionEvolutionTime = DateTime.now();
     // Set initial orientation
     _currentOrientation = orientation ?? getOrientationFromDimensions(
@@ -217,8 +220,8 @@ class ClipQueueManager {
   /// Start the simulation timer
   void _startDescriptionEvolution() {
-    // Cancel any existing timer
-    _descriptionEvolutionTimer?.cancel();
     // Check if simulation is enabled globally in config and from user settings
     final settingsService = SettingsService();
@@ -231,61 +234,13 @@ class ClipQueueManager {
       return;
     }
-    if (Configuration.instance.simLoopFrequencyInSec <= 0) {
-      debugPrint('SIMULATION: Disabled (frequency is 0)');
-      ClipQueueConstants.logEvent('Simulation disabled (frequency is 0)');
-      return;
-    }
-    debugPrint('SIMULATION: Starting simulation timer with settings: enableSimLoop=${Configuration.instance.enableSimLoop}, userSetting=${settingsService.enableSimulation}, frequency=${Configuration.instance.simLoopFrequencyInSec}s');
-    // Adaptive check interval - less frequent checks to reduce overhead
-    final checkInterval = max(3, Configuration.instance.simLoopFrequencyInSec ~/ 3);
-    ClipQueueConstants.logEvent('Starting simulation with check interval of $checkInterval seconds');
-    // Check periodically if it's time to simulate the video
-    _descriptionEvolutionTimer = Timer.periodic(
-      Duration(seconds: checkInterval),
-      (timer) async {
-        // debugPrint('SIMULATION: Timer check triggered');
-        if (_isDisposed) {
-          debugPrint('SIMULATION: Skipping because manager is disposed');
-          return;
-        }
-        // Skip if simulation is paused (due to video playback being paused)
-        if (_isSimulationPaused) {
-          // debugPrint('SIMULATION: Skipping because it is paused');
-          ClipQueueConstants.logEvent('Skipping simulation because it is paused');
-          return;
-        }
-        // We previously delayed simulation if clips were being generated,
-        // but since clip generation is constant, we'll now run them in parallel
-        final isGenerating = _activeGenerations.isNotEmpty;
-        if (isGenerating) {
-          // debugPrint('SIMULATION: Continuing with simulation despite active generations');
-          ClipQueueConstants.logEvent('Running simulation in parallel with active generations');
-          // We no longer return early here
-        }
-        // Calculate time since last simulation
-        final now = DateTime.now();
-        final duration = now.difference(_lastDescriptionEvolutionTime);
-        // debugPrint('SIMULATION: Time since last simulation: ${duration.inSeconds}s (frequency: ${Configuration.instance.simLoopFrequencyInSec}s)');
-        // If we've waited long enough, simulate the video
-        if (duration.inSeconds >= Configuration.instance.simLoopFrequencyInSec) {
-          debugPrint('SIMULATION: Triggering simulation after ${duration.inSeconds} seconds');
-          ClipQueueConstants.logEvent('Triggering simulation after ${duration.inSeconds} seconds');
-          await _evolveDescription();
-          _lastDescriptionEvolutionTime = now;
-        } else {
-          // debugPrint('SIMULATION: Not enough time elapsed since last simulation');
-        }
-      },
-    );
     ClipQueueConstants.logEvent('Started simulation timer');
   }
@@ -304,11 +259,19 @@ class ClipQueueManager {
     // Function to get chat message string
     String getChatMessagesString() {
-      if (_recentChatMessages.isEmpty) return '';
-      return _recentChatMessages.map((msg) =>
         "${msg.username}: ${msg.content}"
       ).join("\n");
     }
     while (retryCount <= maxRetries) {
@@ -377,6 +340,49 @@ class ClipQueueManager {
     }
   }
   /// Mark a specific clip as played
   void markClipAsPlayed(String clipId) {
     _logger.logStateChange(
@@ -638,11 +644,8 @@ class ClipQueueManager {
         : 'Simulation resumed (video playback resumed)'
     );
-    // If we're resuming after a pause, update the last evolution time
-    // to avoid immediate evolution after resuming
-    if (!isPaused) {
-      _lastDescriptionEvolutionTime = DateTime.now();
-    }
   }
   /// Print the current state of the queue
@@ -671,7 +674,6 @@ class ClipQueueManager {
     // Cancel all timers first
     _bufferCheckTimer?.cancel();
-    _descriptionEvolutionTimer?.cancel();
     // Complete any pending generation completers
     for (var clip in _clipBuffer) {

   /// Timer for checking the buffer state
   Timer? _bufferCheckTimer;
   /// Whether the manager is disposed
   bool _isDisposed = false;
     );
     // Start listening to chat messages
+    debugPrint('CHAT_DEBUG: ClipQueueManager initializing chat service for video $videoId');
     final chatService = ChatService();
     chatService.initialize().then((_) {
+      debugPrint('CHAT_DEBUG: ChatService initialized, joining room $videoId');
       chatService.joinRoom(videoId).then((_) {
+        debugPrint('CHAT_DEBUG: Joined chat room, setting up message listener');
         chatService.chatStream.listen(_addChatMessage);
       }).catchError((e) {
+        debugPrint('CHAT_DEBUG: Error joining chat room: $e');
       });
     }).catchError((e) {
+      debugPrint('CHAT_DEBUG: Error initializing chat service: $e');
     });
   }
   /// Add a chat message to the recent messages list
   void _addChatMessage(ChatMessage message) {
+    debugPrint('CHAT_DEBUG: ClipQueueManager received message - videoId: ${message.videoId}, expected: $videoId, content: "${message.content}"');
     if (message.videoId == videoId) {
       _recentChatMessages.add(message);
       // Keep only the 5 most recent messages
         _recentChatMessages.removeAt(0);
       }
       ClipQueueConstants.logEvent('Added chat message: ${message.content.substring(0, min(20, message.content.length))}...');
+      debugPrint('CHAT_DEBUG: Added message to queue manager, total messages: ${_recentChatMessages.length}');
+    } else {
+      debugPrint('CHAT_DEBUG: Message videoId mismatch - ignoring message');
     }
   }
     );
     _clipBuffer.clear();
+    // Reset evolution counter
     _evolutionCounter = 0;
     // Set initial orientation
     _currentOrientation = orientation ?? getOrientationFromDimensions(
   /// Start the simulation timer
   void _startDescriptionEvolution() {
+    // Cancel any existing simulation loop by setting the disposed flag
+    // The _runSimulationLoop method will check _isDisposed and exit gracefully
     // Check if simulation is enabled globally in config and from user settings
     final settingsService = SettingsService();
       return;
     }
+    debugPrint('SIMULATION: Starting simulation with settings: enableSimLoop=${Configuration.instance.enableSimLoop}, userSetting=${settingsService.enableSimulation}, delay=${settingsService.simLoopDelayInSec}s');
+    ClipQueueConstants.logEvent('Starting simulation loop with delay of ${settingsService.simLoopDelayInSec} seconds');
+    // Start the simulation loop immediately
+    _runSimulationLoop();
     ClipQueueConstants.logEvent('Started simulation timer');
   }
     // Function to get chat message string
     String getChatMessagesString() {
+      debugPrint('CHAT_DEBUG: Getting chat messages for simulation - count: ${_recentChatMessages.length}');
+      if (_recentChatMessages.isEmpty) {
+        debugPrint('CHAT_DEBUG: No chat messages available for simulation');
+        return '';
+      }
+      final messagesString = _recentChatMessages.map((msg) =>
         "${msg.username}: ${msg.content}"
       ).join("\n");
+      debugPrint('CHAT_DEBUG: Chat messages for simulation: $messagesString');
+      return messagesString;
     }
     while (retryCount <= maxRetries) {
     }
   }
+  /// Run the simulation loop with delay-based approach
+  Future<void> _runSimulationLoop() async {
+    while (!_isDisposed) {
+      try {
+        // Skip if simulation is paused (due to video playback being paused)
+        if (_isSimulationPaused) {
+          await Future.delayed(const Duration(seconds: 1));
+          continue;
+        }
+        // Run the simulation
+        debugPrint('SIMULATION: Starting simulation iteration');
+        ClipQueueConstants.logEvent('Starting simulation iteration');
+        final simulationStart = DateTime.now();
+        await _evolveDescription();
+        final simulationEnd = DateTime.now();
+        final simulationDuration = simulationEnd.difference(simulationStart);
+        debugPrint('SIMULATION: Completed simulation in ${simulationDuration.inMilliseconds}ms');
+        ClipQueueConstants.logEvent('Completed simulation in ${simulationDuration.inMilliseconds}ms');
+        // Add the user-configured delay after simulation
+        final settingsService = SettingsService();
+        final delaySeconds = settingsService.simLoopDelayInSec;
+        debugPrint('SIMULATION: Waiting ${delaySeconds}s before next simulation');
+        ClipQueueConstants.logEvent('Waiting ${delaySeconds}s before next simulation');
+        await Future.delayed(Duration(seconds: delaySeconds));
+      } catch (e) {
+        debugPrint('SIMULATION: Error in simulation loop: $e');
+        ClipQueueConstants.logEvent('Error in simulation loop: $e');
+        // Wait a bit before retrying to avoid tight error loops
+        await Future.delayed(const Duration(seconds: 5));
+      }
+    }
+    debugPrint('SIMULATION: Simulation loop ended');
+    ClipQueueConstants.logEvent('Simulation loop ended');
+  }
   /// Mark a specific clip as played
   void markClipAsPlayed(String clipId) {
     _logger.logStateChange(
         : 'Simulation resumed (video playback resumed)'
     );
+    // Note: With the delay-based approach, simulation timing is handled
+    // internally by the _runSimulationLoop method
   }
   /// Print the current state of the queue
     // Cancel all timers first
     _bufferCheckTimer?.cancel();
     // Complete any pending generation completers
     for (var clip in _clipBuffer) {

lib/services/settings_service.dart CHANGED Viewed

@@ -9,6 +9,8 @@ class SettingsService {
   static const String _negativePromptKey = 'negative_video_prompt';
   static const String _showSceneDebugInfoKey = 'show_scene_debug_info';
   static const String _enableSimulationKey = 'enable_simulation';
   static const String _llmProviderKey = 'llm_provider';
   static const String _llmModelKey = 'llm_model';
   static const String _llmApiKeyKey = 'llm_api_key';
@@ -61,6 +63,20 @@ class SettingsService {
     _settingsController.add(null);
   }
   String get llmProvider => _prefs.getString(_llmProviderKey) ?? 'built-in';
   Future<void> setLlmProvider(String provider) async {

   static const String _negativePromptKey = 'negative_video_prompt';
   static const String _showSceneDebugInfoKey = 'show_scene_debug_info';
   static const String _enableSimulationKey = 'enable_simulation';
+  static const String _simLoopDelayKey = 'sim_loop_delay_in_sec';
+  static const String _gameMasterPromptKey = 'game_master_prompt';
   static const String _llmProviderKey = 'llm_provider';
   static const String _llmModelKey = 'llm_model';
   static const String _llmApiKeyKey = 'llm_api_key';
     _settingsController.add(null);
   }
+  int get simLoopDelayInSec => _prefs.getInt(_simLoopDelayKey) ?? 5;
+  Future<void> setSimLoopDelayInSec(int value) async {
+    await _prefs.setInt(_simLoopDelayKey, value);
+    _settingsController.add(null);
+  }
+  String get gameMasterPrompt => _prefs.getString(_gameMasterPromptKey) ?? '';
+  Future<void> setGameMasterPrompt(String value) async {
+    await _prefs.setString(_gameMasterPromptKey, value);
+    _settingsController.add(null);
+  }
   String get llmProvider => _prefs.getString(_llmProviderKey) ?? 'built-in';
   Future<void> setLlmProvider(String provider) async {

lib/services/websocket_api_service.dart CHANGED Viewed

@@ -15,6 +15,7 @@ import 'package:web_socket_channel/web_socket_channel.dart';
 import '../models/search_state.dart';
 import '../models/video_result.dart';
 import '../models/video_orientation.dart';
 class WebSocketRequest {
   final String requestId;
@@ -49,6 +50,9 @@ class WebSocketApiService {
   factory WebSocketApiService() => _instance;
   WebSocketApiService._internal();
   // Dynamically build WebSocket URL based on current host in web platform
   // or use environment variable/production URL/localhost for development on other platforms
   static String get _wsUrl {
@@ -60,13 +64,13 @@ class WebSocketApiService {
       // For localhost, explicitly include port 8080
       if (location.host == 'localhost' || location.host.startsWith('localhost:')) {
         final url = '$protocol://localhost:8080/ws';
-        debugPrint('WebSocketApiService: Using localhost:8080 WebSocket URL: $url');
         return url;
       }
       // For other hosts, include the original port number in the URL
       final url = '$protocol://${location.host}/ws';
-      debugPrint('WebSocketApiService: Using dynamic WebSocket URL: $url');
       return url;
     } else {
       // First try to get WebSocket URL from environment variable (highest priority)
@@ -143,7 +147,7 @@ class WebSocketApiService {
     if (_initialized) return;
     try {
-      debugPrint('WebSocketApiService: Initializing and connecting...');
       // Add page unload handler for web platform
       if (kIsWeb) {
@@ -215,7 +219,7 @@ class WebSocketApiService {
       if (response['success'] == true && response['user_role'] != null) {
         _userRole = response['user_role'] as String;
         _userRoleController.add(_userRole);
-        debugPrint('WebSocketApiService: User role set to $_userRole');
         // Now that we know the role, check device connection limit for non-anonymous users
         if (kIsWeb && _userRole != 'anon') {
@@ -892,6 +896,7 @@ class WebSocketApiService {
                 'model': llmModel,
                 'api_key': llmApiKey,
                 'hf_token': hfApiKey,
               },
             },
           ),
@@ -1030,7 +1035,7 @@ class WebSocketApiService {
       final action = data['action'] as String?;
       final requestId = data['requestId'] as String?;
-      debugPrint('WebSocketApiService: Received message for action: $action, requestId: $requestId');
       // Update user role if present in response (from heartbeat or get_user_role)
       if (data['user_role'] != null) {
@@ -1233,13 +1238,15 @@ class WebSocketApiService {
     try {
       final requestData = request.toJson();
-      debugPrint('WebSocketApiService: Sending request ${request.requestId} (${request.action}): ${json.encode(requestData)}');
       _channel!.sink.add(json.encode(requestData));
       final response = await completer.future.timeout(
         timeout ?? const Duration(seconds: 10),
         onTimeout: () {
-          debugPrint('WebSocketApiService: Request ${request.requestId} timed out');
           _cleanup(request.requestId);
           throw TimeoutException('Request timeout');
         },
@@ -1280,6 +1287,7 @@ class WebSocketApiService {
               'provider': llmProvider,
               'model': llmModel,
               'api_key': llmApiKey,
             },
           },
         ),
@@ -1308,7 +1316,7 @@ class WebSocketApiService {
     int height = 320,
     int width = 512,
     int seed = 0,
-    Duration timeout = const Duration(seconds: 12), // we keep things super tight, as normally a video only takes 2~3s to generate
     VideoOrientation orientation = VideoOrientation.LANDSCAPE,
   }) async {
     final settings = SettingsService();
@@ -1362,6 +1370,7 @@ class WebSocketApiService {
             'provider': llmProvider,
             'model': llmModel,
             'api_key': llmApiKey,
           },
         },
       ),
@@ -1395,6 +1404,10 @@ class WebSocketApiService {
     }
     debugPrint('WebSocketApiService: Sending simulation request for video $videoId (evolution #$evolutionCount)');
     try {
       // If chat messages are provided directly, use them; otherwise the default empty string is used
@@ -1444,6 +1457,7 @@ class WebSocketApiService {
               'provider': llmProvider,
               'model': llmModel,
               'api_key': llmApiKey,
             },
           },
         ),

 import '../models/search_state.dart';
 import '../models/video_result.dart';
 import '../models/video_orientation.dart';
+import '../utils/colored_logger.dart';
 class WebSocketRequest {
   final String requestId;
   factory WebSocketApiService() => _instance;
   WebSocketApiService._internal();
+  // Colored logger
+  final _log = ColoredLogger.get('WebSocketApiService');
   // Dynamically build WebSocket URL based on current host in web platform
   // or use environment variable/production URL/localhost for development on other platforms
   static String get _wsUrl {
       // For localhost, explicitly include port 8080
       if (location.host == 'localhost' || location.host.startsWith('localhost:')) {
         final url = '$protocol://localhost:8080/ws';
+        ColoredLogger.get('WebSocketApiService').network('Using localhost:8080 WebSocket URL: $url');
         return url;
       }
       // For other hosts, include the original port number in the URL
       final url = '$protocol://${location.host}/ws';
+      ColoredLogger.get('WebSocketApiService').network('Using dynamic WebSocket URL: $url');
       return url;
     } else {
       // First try to get WebSocket URL from environment variable (highest priority)
     if (_initialized) return;
     try {
+      _log.info('Initializing and connecting...');
       // Add page unload handler for web platform
       if (kIsWeb) {
       if (response['success'] == true && response['user_role'] != null) {
         _userRole = response['user_role'] as String;
         _userRoleController.add(_userRole);
+        _log.success('User role set to $_userRole');
         // Now that we know the role, check device connection limit for non-anonymous users
         if (kIsWeb && _userRole != 'anon') {
                 'model': llmModel,
                 'api_key': llmApiKey,
                 'hf_token': hfApiKey,
+                'game_master_prompt': settings.gameMasterPrompt,
               },
             },
           ),
       final action = data['action'] as String?;
       final requestId = data['requestId'] as String?;
+      _log.websocket('Received message for action: $action, requestId: [$requestId]');
       // Update user role if present in response (from heartbeat or get_user_role)
       if (data['user_role'] != null) {
     try {
       final requestData = request.toJson();
+      _log.websocket('Sending request [${request.requestId}] (${request.action})', {
+        'data': json.encode(requestData)
+      });
       _channel!.sink.add(json.encode(requestData));
       final response = await completer.future.timeout(
         timeout ?? const Duration(seconds: 10),
         onTimeout: () {
+          _log.error('Request [${request.requestId}] timed out');
           _cleanup(request.requestId);
           throw TimeoutException('Request timeout');
         },
               'provider': llmProvider,
               'model': llmModel,
               'api_key': llmApiKey,
+              'game_master_prompt': settings.gameMasterPrompt,
             },
           },
         ),
     int height = 320,
     int width = 512,
     int seed = 0,
+    Duration timeout = const Duration(seconds: 8), // we keep things super tight to fail quickly, as normally a video only takes 2 seconds to generate (including the transatlantic round trip)
     VideoOrientation orientation = VideoOrientation.LANDSCAPE,
   }) async {
     final settings = SettingsService();
             'provider': llmProvider,
             'model': llmModel,
             'api_key': llmApiKey,
+            'game_master_prompt': settings.gameMasterPrompt,
           },
         },
       ),
     }
     debugPrint('WebSocketApiService: Sending simulation request for video $videoId (evolution #$evolutionCount)');
+    debugPrint('CHAT_DEBUG: WebSocket simulate() called with chatMessages length: ${chatMessages.length}');
+    if (chatMessages.isNotEmpty) {
+      debugPrint('CHAT_DEBUG: Chat messages content: $chatMessages');
+    }
     try {
       // If chat messages are provided directly, use them; otherwise the default empty string is used
               'provider': llmProvider,
               'model': llmModel,
               'api_key': llmApiKey,
+              'game_master_prompt': settings.gameMasterPrompt,
             },
           },
         ),

lib/utils/colored_logger.dart ADDED Viewed

	@@ -0,0 +1,270 @@

+import 'dart:developer' as developer;
+import 'package:flutter/foundation.dart';
+/// ANSI color codes for terminal output
+class AnsiColors {
+  static const String reset = '\x1B[0m';
+  static const String bold = '\x1B[1m';
+  static const String dim = '\x1B[2m';
+  static const String italic = '\x1B[3m';
+  static const String underline = '\x1B[4m';
+  // Foreground colors
+  static const String black = '\x1B[30m';
+  static const String red = '\x1B[31m';
+  static const String green = '\x1B[32m';
+  static const String yellow = '\x1B[33m';
+  static const String blue = '\x1B[34m';
+  static const String magenta = '\x1B[35m';
+  static const String cyan = '\x1B[36m';
+  static const String white = '\x1B[37m';
+  // Bright foreground colors
+  static const String brightBlack = '\x1B[90m';
+  static const String brightRed = '\x1B[91m';
+  static const String brightGreen = '\x1B[92m';
+  static const String brightYellow = '\x1B[93m';
+  static const String brightBlue = '\x1B[94m';
+  static const String brightMagenta = '\x1B[95m';
+  static const String brightCyan = '\x1B[96m';
+  static const String brightWhite = '\x1B[97m';
+  // Background colors
+  static const String bgBlack = '\x1B[40m';
+  static const String bgRed = '\x1B[41m';
+  static const String bgGreen = '\x1B[42m';
+  static const String bgYellow = '\x1B[43m';
+  static const String bgBlue = '\x1B[44m';
+  static const String bgMagenta = '\x1B[45m';
+  static const String bgCyan = '\x1B[46m';
+  static const String bgWhite = '\x1B[47m';
+}
+/// Log levels with associated colors and emojis
+enum LogLevel {
+  debug(AnsiColors.brightBlack, '🔍', 'DEBUG'),
+  info(AnsiColors.brightCyan, '💡', 'INFO'),
+  warning(AnsiColors.brightYellow, '⚠️', 'WARN'),
+  error(AnsiColors.brightRed, '❌', 'ERROR'),
+  success(AnsiColors.brightGreen, '✅', 'SUCCESS'),
+  network(AnsiColors.brightMagenta, '🌐', 'NET'),
+  websocket(AnsiColors.cyan, '🔌', 'WS'),
+  video(AnsiColors.brightBlue, '🎬', 'VIDEO'),
+  chat(AnsiColors.green, '💬', 'CHAT'),
+  search(AnsiColors.yellow, '🔍', 'SEARCH');
+  const LogLevel(this.color, this.emoji, this.label);
+  final String color;
+  final String emoji;
+  final String label;
+}
+/// Beautiful colored logger for Flutter applications
+class ColoredLogger {
+  final String _className;
+  ColoredLogger(this._className);
+  /// Create a logger for a specific class
+  static ColoredLogger get(String className) {
+    return ColoredLogger(className);
+  }
+  /// Debug level logging - for detailed debugging info
+  void debug(String message, [Map<String, dynamic>? data]) {
+    _log(LogLevel.debug, message, data);
+  }
+  /// Info level logging - for general information
+  void info(String message, [Map<String, dynamic>? data]) {
+    _log(LogLevel.info, message, data);
+  }
+  /// Warning level logging - for potential issues
+  void warning(String message, [Map<String, dynamic>? data]) {
+    _log(LogLevel.warning, message, data);
+  }
+  /// Error level logging - for errors and exceptions
+  void error(String message, [Map<String, dynamic>? data]) {
+    _log(LogLevel.error, message, data);
+  }
+  /// Success level logging - for successful operations
+  void success(String message, [Map<String, dynamic>? data]) {
+    _log(LogLevel.success, message, data);
+  }
+  /// Network level logging - for network operations
+  void network(String message, [Map<String, dynamic>? data]) {
+    _log(LogLevel.network, message, data);
+  }
+  /// WebSocket level logging - for WebSocket operations
+  void websocket(String message, [Map<String, dynamic>? data]) {
+    _log(LogLevel.websocket, message, data);
+  }
+  /// Video level logging - for video generation operations
+  void video(String message, [Map<String, dynamic>? data]) {
+    _log(LogLevel.video, message, data);
+  }
+  /// Chat level logging - for chat operations
+  void chat(String message, [Map<String, dynamic>? data]) {
+    _log(LogLevel.chat, message, data);
+  }
+  /// Search level logging - for search operations
+  void search(String message, [Map<String, dynamic>? data]) {
+    _log(LogLevel.search, message, data);
+  }
+  void _log(LogLevel level, String message, Map<String, dynamic>? data) {
+    if (!kDebugMode) return; // Only log in debug mode
+    final timestamp = DateTime.now();
+    final timeStr = '${timestamp.hour.toString().padLeft(2, '0')}:'
+                   '${timestamp.minute.toString().padLeft(2, '0')}:'
+                   '${timestamp.second.toString().padLeft(2, '0')}.'
+                   '${timestamp.millisecond.toString().padLeft(3, '0')}';
+    // Format the main log message with colors
+    final coloredMessage = _colorizeMessage(message);
+    // Build the log line
+    final logLine = StringBuffer();
+    // Timestamp (dim)
+    logLine.write('${AnsiColors.dim}$timeStr${AnsiColors.reset} ');
+    // Level with color and emoji
+    logLine.write('${level.color}${level.emoji} ${level.label.padRight(7)}${AnsiColors.reset} ');
+    // Class name (bright black)
+    logLine.write('${AnsiColors.brightBlack}[$_className]${AnsiColors.reset} ');
+    // Message
+    logLine.write(coloredMessage);
+    // Add data if provided
+    if (data != null && data.isNotEmpty) {
+      logLine.write(' ${AnsiColors.dim}${_formatData(data)}${AnsiColors.reset}');
+    }
+    // Use developer.log for better IDE integration
+    developer.log(
+      logLine.toString(),
+      name: _className,
+      level: _getLevelValue(level),
+    );
+  }
+  String _colorizeMessage(String message) {
+    String result = message;
+    // Highlight request IDs in brackets
+    result = result.replaceAllMapped(
+      RegExp(r'\[([a-zA-Z0-9-]+)\]'),
+      (match) => '${AnsiColors.brightGreen}[${match.group(1)}]${AnsiColors.reset}',
+    );
+    // Highlight user IDs
+    result = result.replaceAllMapped(
+      RegExp(r'\buser ([a-zA-Z0-9-]+)'),
+      (match) => 'user ${AnsiColors.brightBlue}${match.group(1)}${AnsiColors.reset}',
+    );
+    // Highlight actions
+    result = result.replaceAllMapped(
+      RegExp(r'\b(generate_video|search|simulate|join_chat|leave_chat|chat_message|connect|disconnect)\b'),
+      (match) => '${AnsiColors.brightYellow}${match.group(1)}${AnsiColors.reset}',
+    );
+    // Highlight status keywords
+    result = result.replaceAllMapped(
+      RegExp(r'\b(success|successful|completed|connected|ready|ok)\b', caseSensitive: false),
+      (match) => '${AnsiColors.brightGreen}${match.group(1)}${AnsiColors.reset}',
+    );
+    result = result.replaceAllMapped(
+      RegExp(r'\b(error|failed|timeout|exception|crash)\b', caseSensitive: false),
+      (match) => '${AnsiColors.brightRed}${match.group(1)}${AnsiColors.reset}',
+    );
+    result = result.replaceAllMapped(
+      RegExp(r'\b(warning|retry|reconnect|fallback)\b', caseSensitive: false),
+      (match) => '${AnsiColors.brightYellow}${match.group(1)}${AnsiColors.reset}',
+    );
+    // Highlight numbers with units
+    result = result.replaceAllMapped(
+      RegExp(r'\b(\d+\.?\d*)(ms|s|MB|KB|bytes|chars|fps)?\b'),
+      (match) => '${AnsiColors.brightMagenta}${match.group(1)}${AnsiColors.cyan}${match.group(2) ?? ''}${AnsiColors.reset}',
+    );
+    // Highlight URLs
+    result = result.replaceAllMapped(
+      RegExp(r'https?://[^\s]+'),
+      (match) => '${AnsiColors.underline}${AnsiColors.brightCyan}${match.group(0)}${AnsiColors.reset}',
+    );
+    // Highlight JSON-like structures
+    result = result.replaceAllMapped(
+      RegExp(r'\{[^}]*\}'),
+      (match) => '${AnsiColors.dim}${match.group(0)}${AnsiColors.reset}',
+    );
+    // Highlight strings in quotes
+    result = result.replaceAllMapped(
+      RegExp(r'"([^"]*)"'),
+      (match) => '"${AnsiColors.green}${match.group(1)}${AnsiColors.reset}"',
+    );
+    return result;
+  }
+  String _formatData(Map<String, dynamic> data) {
+    final entries = data.entries.map((e) {
+      final key = e.key;
+      final value = e.value.toString();
+      return '${AnsiColors.cyan}$key${AnsiColors.reset}=${AnsiColors.brightWhite}$value${AnsiColors.reset}';
+    }).join(' ');
+    return '{$entries}';
+  }
+  int _getLevelValue(LogLevel level) {
+    switch (level) {
+      case LogLevel.debug:
+        return 500;
+      case LogLevel.info:
+        return 800;
+      case LogLevel.warning:
+        return 900;
+      case LogLevel.error:
+        return 1000;
+      case LogLevel.success:
+        return 800;
+      case LogLevel.network:
+        return 700;
+      case LogLevel.websocket:
+        return 700;
+      case LogLevel.video:
+        return 700;
+      case LogLevel.chat:
+        return 700;
+      case LogLevel.search:
+        return 700;
+    }
+  }
+}
+/// Extension methods for easy logging
+extension ColoredLogging on Object {
+  ColoredLogger get log => ColoredLogger.get(runtimeType.toString());
+}
+/// Global logger instance for quick access
+final appLog = ColoredLogger.get('App');

lib/widgets/chat_widget.dart CHANGED Viewed

@@ -251,6 +251,7 @@ class _ChatWidgetState extends State<ChatWidget> {
               style: const TextStyle(color: TikSlopColors.onSurface),
               maxLength: 255,
               maxLines: 1,
               onChanged: (value) {
                 // Enforce the character limit by trimming excess characters
                 if (value.length > 255) {
@@ -261,7 +262,7 @@ class _ChatWidgetState extends State<ChatWidget> {
                 }
               },
               decoration: InputDecoration(
-                hintText: 'Chat with this tikslopr..',
                 hintStyle: const TextStyle(color: TikSlopColors.onSurfaceVariant, fontSize: 16),
                 border: OutlineInputBorder(
                   borderRadius: BorderRadius.circular(12),
@@ -417,7 +418,7 @@ class _ChatWidgetState extends State<ChatWidget> {
                 Icon(Icons.chat, color: TikSlopColors.onBackground),
                 SizedBox(width: 8),
                 Text(
-                  'Simulation log',
                   style: TextStyle(
                     color: TikSlopColors.onBackground,
                     fontSize: 16,

               style: const TextStyle(color: TikSlopColors.onSurface),
               maxLength: 255,
               maxLines: 1,
+              onSubmitted: (_) => _sendMessage(),
               onChanged: (value) {
                 // Enforce the character limit by trimming excess characters
                 if (value.length > 255) {
                 }
               },
               decoration: InputDecoration(
+                hintText: 'Ask the game master!',
                 hintStyle: const TextStyle(color: TikSlopColors.onSurfaceVariant, fontSize: 16),
                 border: OutlineInputBorder(
                   borderRadius: BorderRadius.circular(12),
                 Icon(Icons.chat, color: TikSlopColors.onBackground),
                 SizedBox(width: 8),
                 Text(
+                  'World simulator log',
                   style: TextStyle(
                     color: TikSlopColors.onBackground,
                     fontSize: 16,

server/README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+  server/
+  ├── __init__.py
+  ├── api_config.py           # Configuration constants
+  ├── api_core.py            # Main API class (now much cleaner!)
+  ├── api_metrics.py         # Metrics functionality
+  ├── api_session.py         # Session management
+  ├── chat.py               # Chat room management
+  ├── config_utils.py       # Configuration utilities
+  ├── endpoint_manager.py   # Endpoint management with error handling
+  ├── llm_utils.py         # LLM client and text generation
+  ├── models.py            # Data models and types
+  ├── utils.py             # Generic utilities (YAML parsing, etc.)
+  └── video_utils.py       # Video generation (HF endpoints + Gradio)

server/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# Server package initialization
+"""
+This package contains the server-side components for the TikSlop API.
+"""

api_config.py → server/api_config.py RENAMED Viewed

File without changes

api_core.py → server/api_core.py RENAMED Viewed

@@ -4,396 +4,56 @@ import io
 import re
 import base64
 import uuid
-from typing import Dict, Any, Optional, List, Literal
-from dataclasses import dataclass
-from asyncio import Lock, Queue
 import asyncio
 import time
 import datetime
-from contextlib import asynccontextmanager
 from collections import defaultdict
 from aiohttp import web, ClientSession
-from huggingface_hub import InferenceClient, HfApi
 from gradio_client import Client
 import random
 import yaml
 import json
-from api_config import *
-# User role type
-UserRole = Literal['anon', 'normal', 'pro', 'admin']
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
-logger = logging.getLogger(__name__)
-def generate_seed():
-    """Generate a random positive 32-bit integer seed."""
-    return random.randint(0, 2**32 - 1)
-def sanitize_yaml_response(response_text: str) -> str:
-    """
-    Sanitize and format AI response into valid YAML.
-    Returns properly formatted YAML string.
-    """
-    response_text = response_text.split("```")[0]
-    # Remove any markdown code block indicators and YAML document markers
-    clean_text = re.sub(r'```yaml|```|---|\.\.\.$', '', response_text.strip())
-    # Split into lines and process each line
-    lines = clean_text.split('\n')
-    sanitized_lines = []
-    current_field = None
-    for line in lines:
-        stripped = line.strip()
-        if not stripped:
-            continue
-        # Handle field starts
-        if stripped.startswith('title:') or stripped.startswith('description:'):
-            # Ensure proper YAML format with space after colon and proper quoting
-            field_name = stripped.split(':', 1)[0]
-            field_value = stripped.split(':', 1)[1].strip().strip('"\'')
-            # Quote the value if it contains special characters
-            if any(c in field_value for c in ':[]{},&*#?|-<>=!%@`'):
-                field_value = f'"{field_value}"'
-            sanitized_lines.append(f"{field_name}: {field_value}")
-            current_field = field_name
-        elif stripped.startswith('tags:'):
-            sanitized_lines.append('tags:')
-            current_field = 'tags'
-        elif stripped.startswith('-') and current_field == 'tags':
-            # Process tag values
-            tag = stripped[1:].strip().strip('"\'')
-            if tag:
-                # Clean and format tag
-                tag = re.sub(r'[^\x00-\x7F]+', '', tag)  # Remove non-ASCII
-                tag = re.sub(r'[^a-zA-Z0-9\s-]', '', tag)  # Keep only alphanumeric and hyphen
-                tag = tag.strip().lower().replace(' ', '-')
-                if tag:
-                    sanitized_lines.append(f"  - {tag}")
-        elif current_field in ['title', 'description']:
-            # Handle multi-line title/description continuation
-            value = stripped.strip('"\'')
-            if value:
-                # Append to previous line
-                prev = sanitized_lines[-1]
-                sanitized_lines[-1] = f"{prev} {value}"
-    # Ensure the YAML has all required fields
-    required_fields = {'title', 'description', 'tags'}
-    found_fields = {line.split(':')[0].strip() for line in sanitized_lines if ':' in line}
-    for field in required_fields - found_fields:
-        if field == 'tags':
-            sanitized_lines.extend(['tags:', '  - default'])
-        else:
-            sanitized_lines.append(f'{field}: "No {field} provided"')
-    return '\n'.join(sanitized_lines)
-@dataclass
-class Endpoint:
-    id: int
-    url: str
-    busy: bool = False
-    last_used: float = 0
-    error_count: int = 0
-    error_until: float = 0  # Timestamp until which this endpoint is considered in error state
-class EndpointManager:
-    def __init__(self):
-        self.endpoints: List[Endpoint] = []
-        self.lock = Lock()
-        self.initialize_endpoints()
-        self.last_used_index = -1  # Track the last used endpoint for round-robin
-    def initialize_endpoints(self):
-        """Initialize the list of endpoints"""
-        for i, url in enumerate(VIDEO_ROUND_ROBIN_ENDPOINT_URLS):
-            endpoint = Endpoint(id=i + 1, url=url)
-            self.endpoints.append(endpoint)
-    def _get_next_free_endpoint(self):
-        """Get the next available non-busy endpoint, or oldest endpoint if all are busy"""
-        current_time = time.time()
-        # First priority: Get any non-busy and non-error endpoint
-        free_endpoints = [
-            ep for ep in self.endpoints
-            if not ep.busy and current_time > ep.error_until
-        ]
-        if free_endpoints:
-            # Return the least recently used free endpoint
-            return min(free_endpoints, key=lambda ep: ep.last_used)
-        # Second priority: If all busy/error, use round-robin but skip error endpoints
-        tried_count = 0
-        next_index = self.last_used_index
-        while tried_count < len(self.endpoints):
-            next_index = (next_index + 1) % len(self.endpoints)
-            tried_count += 1
-            # If endpoint is not in error state, use it
-            if current_time > self.endpoints[next_index].error_until:
-                self.last_used_index = next_index
-                return self.endpoints[next_index]
-        # If all endpoints are in error state, use the one with earliest error expiry
-        self.last_used_index = next_index
-        return min(self.endpoints, key=lambda ep: ep.error_until)
-    @asynccontextmanager
-    async def get_endpoint(self, max_wait_time: int = 10):
-        """Get the next available endpoint using a context manager"""
-        start_time = time.time()
-        endpoint = None
-        try:
-            while True:
-                if time.time() - start_time > max_wait_time:
-                    raise TimeoutError(f"Could not acquire an endpoint within {max_wait_time} seconds")
-                async with self.lock:
-                    # Get the next available endpoint using our selection strategy
-                    endpoint = self._get_next_free_endpoint()
-                    # Mark it as busy
-                    endpoint.busy = True
-                    endpoint.last_used = time.time()
-                    #logger.info(f"Using endpoint {endpoint.id} (busy: {endpoint.busy}, last used: {endpoint.last_used})")
-                    break
-            yield endpoint
-        finally:
-            if endpoint:
-                async with self.lock:
-                    endpoint.busy = False
-                    endpoint.last_used = time.time()
-                    # We don't need to put back into queue - our strategy now picks directly from the list
-class ChatRoom:
-    def __init__(self):
-        self.messages = []
-        self.connected_clients = set()
-        self.max_history = 100
-    def add_message(self, message):
-        self.messages.append(message)
-        if len(self.messages) > self.max_history:
-            self.messages.pop(0)
-    def get_recent_messages(self, limit=50):
-        return self.messages[-limit:]
 class VideoGenerationAPI:
     def __init__(self):
         self.hf_api = HfApi(token=HF_TOKEN)
         self.endpoint_manager = EndpointManager()
         self.active_requests: Dict[str, asyncio.Future] = {}
-        self.chat_rooms = defaultdict(ChatRoom)
         self.video_events: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
         self.event_history_limit = 50
         # Cache for user roles to avoid repeated API calls
         self.user_role_cache: Dict[str, Dict[str, Any]] = {}
         # Cache expiration time (10 minutes)
         self.cache_expiration = 600
-    def _get_inference_client(self, llm_config: Optional[dict] = None) -> InferenceClient:
-        """
-        Get an InferenceClient configured with the provided LLM settings.
-        Priority order for API keys:
-        1. Provider-specific API key (if provided)
-        2. User's HF token (if provided)
-        3. Server's HF token (only for built-in provider)
-        4. Raise exception if no valid key is available
-        """
-        if not llm_config:
-            if HF_TOKEN:
-                return InferenceClient(
-                    model=TEXT_MODEL,
-                    token=HF_TOKEN
-                )
-            else:
-                raise ValueError("Built-in provider is not available. Server HF_TOKEN is not configured.")
-        provider = llm_config.get('provider', '').lower()
-        logger.info(f"provider = {provider}")
-        # If no provider or model specified, use default
-        if not provider or provider == 'built-in':
-            if HF_TOKEN:
-                return InferenceClient(
-                    model=TEXT_MODEL,
-                    token=HF_TOKEN
-                )
-            else:
-                raise ValueError("Built-in provider is not available. Server HF_TOKEN is not configured.")
-        model = llm_config.get('model', '')
-        user_provider_api_key = llm_config.get('api_key', '')  # Provider-specific API key
-        user_hf_token = llm_config.get('hf_token', '')  # User's HF token
-        #logger.info(f"model = {model}")
-        #logger.info(f"user_provider_api_key = {user_provider_api_key}")
-        #logger.info(f"user_hf_token = {user_hf_token}")
-        # If no provider or model specified, use default
-        if not provider or provider == 'built-in':
-            if HF_TOKEN:
-                return InferenceClient(
-                    model=TEXT_MODEL,
-                    token=HF_TOKEN
-                )
-            else:
-                raise ValueError("Built-in provider is not available. Server HF_TOKEN is not configured.")
-        try:
-            # Case 1: Use a provider with a provider-specific API key if available
-            # This mode is currently hidden in the Flutter UI (we don't ask for provider-specific keys yet)
-            # but it is implemented here so that we don't forget it later
-            if user_provider_api_key:
-                return InferenceClient(
-                    provider=hf_provider,
-                    model=model,
-                    api_key=user_provider_api_key
-                )
-            # Case 2: Use a provider with user's HF token if available
-            elif user_hf_token:
-                return InferenceClient(
-                    provider=hf_provider,
-                    model=model,
-                    token=user_hf_token
-                )
-            #
-            else:
-                raise ValueError(f"No API key provided for provider '{provider}'. Please provide either a valid {provider} API key or your Hugging Face API key.")
-        except ValueError:
-            # Re-raise ValueError for missing API keys
-            raise
-        except Exception as e:
-            logger.error(f"Error creating InferenceClient for provider '{provider}' and model '{model}': {e}")
-            # Re-raise all other exceptions
-            raise
-    async def _generate_text(self, prompt: str, llm_config: Optional[dict] = None,
-                           max_new_tokens: int = 200, temperature: float = 0.7,
-                           model_override: Optional[str] = None) -> str:
-        """
-        Helper method to generate text using the appropriate client and configuration.
-        Tries chat_completion first (modern standard), falls back to text_generation.
-        Args:
-            prompt: The prompt to generate text from
-            llm_config: Optional LLM configuration dict
-            max_new_tokens: Maximum number of new tokens to generate
-            temperature: Temperature for generation
-            model_override: Optional model to use instead of the one in llm_config
-        Returns:
-            Generated text string
-        """
-        # Get the appropriate client
-        client = self._get_inference_client(llm_config)
-        # Determine the model to use
-        if model_override:
-            model_to_use = model_override
-        elif llm_config:
-            model_to_use = llm_config.get('model', TEXT_MODEL)
-        else:
-            model_to_use = TEXT_MODEL
-        # Try chat_completion first (modern standard, more widely supported)
-        try:
-            messages = [{"role": "user", "content": prompt}]
-            if llm_config and llm_config.get('provider') != 'huggingface':
-                # For third-party providers
-                completion = await asyncio.get_event_loop().run_in_executor(
-                    None,
-                    lambda: client.chat.completions.create(
-                        messages=messages,
-                        max_tokens=max_new_tokens,
-                        temperature=temperature
-                    )
-                )
-            else:
-                # For HuggingFace models, specify the model
-                completion = await asyncio.get_event_loop().run_in_executor(
-                    None,
-                    lambda: client.chat.completions.create(
-                        model=model_to_use,
-                        messages=messages,
-                        max_tokens=max_new_tokens,
-                        temperature=temperature
-                    )
-                )
-            # Extract the generated text from the chat completion response
-            return completion.choices[0].message.content
-        except Exception as e:
-            error_message = str(e).lower()
-            # Check if the error is related to task compatibility or API not supported
-            if ("not supported for task" in error_message or
-                "conversational" in error_message or
-                "chat" in error_message):
-                logger.info(f"chat_completion not supported, falling back to text_generation: {e}")
-                # Fall back to text_generation API
-                try:
-                    if llm_config and llm_config.get('provider') != 'huggingface':
-                        # For third-party providers
-                        response = await asyncio.get_event_loop().run_in_executor(
-                            None,
-                            lambda: client.text_generation(
-                                prompt,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature
-                            )
-                        )
-                    else:
-                        # For HuggingFace models, specify the model
-                        response = await asyncio.get_event_loop().run_in_executor(
-                            None,
-                            lambda: client.text_generation(
-                                prompt,
-                                model=model_to_use,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature
-                            )
-                        )
-                    return response
-                except Exception as text_error:
-                    logger.error(f"Both chat_completion and text_generation failed: {text_error}")
-                    raise text_error
-            else:
-                # Re-raise the original error if it's not a task compatibility issue
-                logger.error(f"chat_completion failed with non-compatibility error: {e}")
-                raise e
     def _add_event(self, video_id: str, event: Dict[str, Any]):
         """Add an event to the video's history and maintain the size limit"""
@@ -490,32 +150,13 @@ class VideoGenerationAPI:
         temperature = random.uniform(0.68, 0.72)
         while current_attempt <= max_attempts:
-            prompt = f"""# Instruction
-Your response MUST be a YAML object containing a title and description, consistent with what we can find on a video sharing platform.
-Format your YAML response with only those fields: "title" (a short string) and "description" (string caption of the scene). Do not add any other field.
-In the description field, describe in a very synthetic way the visuals of the first shot (first scene), eg "<STYLE>, medium close-up shot, high angle view. In the foreground a <OPTIONAL AGE> <OPTIONAL GENDER> <CHARACTERS> <ACTIONS>. In the background <DESCRIBE LOCATION, BACKGROUND CHARACTERS, OBJECTS ETC>. The scene is lit by <LIGHTING> <WEATHER>". This is just an example! you MUST replace the <TAGS>!!.
-Don't forget to replace <STYLE> etc, by the actual fields!!
-For the style, be creative, for instance you can use anything like a "documentary footage", "japanese animation", "movie scene", "tv series", "tv show", "security footage" etc.
-If the user ask for something specific eg "movie screencap", "movie scene", "documentary footage" "animation" as a style etc.
-Keep it minimalist but still descriptive, don't use bullets points, use simple words, go to the essential to describe style (cinematic, documentary footage, 3D rendering..), camera modes and angles, characters, age, gender, action, location, lighting, country, costume, time, weather, textures, color palette.. etc). Write about 80 words, and use between 2 and 3 sentences.
-The most import part is to describe the actions and movements in the scene, so don't forget that!
-Don't describe sound, so ever say things like "atmospheric music playing in the background".
-Instead describe the visual elements we can see in the background, be precise, (if there are anything, cars, objects, people, bricks, birds, clouds, trees, leaves or grass then say it so etc).
-Make the result unique and different from previous search results. ONLY RETURN YAML AND WITH ENGLISH CONTENT, NOT CHINESE - DO NOT ADD ANY OTHER COMMENT!
-# Context
-This is attempt {current_attempt}.
-# Input
-Describe the first scene/shot for: "{query}".
-# Output
-```yaml
-title: \""""
             try:
-                raw_yaml_str = await self._generate_text(
                     prompt,
                     llm_config=llm_config,
                     max_new_tokens=200,
@@ -526,31 +167,7 @@ title: \""""
                 #logger.info(f"search_video(): raw_yaml_str = {raw_yaml_str}")
-                if raw_yaml_str.startswith("```yaml"):
-                    # Remove the "```yaml" at the beginning and closing ```
-                    raw_yaml_str = raw_yaml_str[7:]  # Remove "```yaml" (7 characters)
-                    if raw_yaml_str.endswith("```"):
-                        raw_yaml_str = raw_yaml_str[:-3]  # Remove closing ```
-                    raw_yaml_str = raw_yaml_str.strip()
-                elif raw_yaml_str.startswith("```"):
-                    # Remove the "```" at the beginning and closing ```
-                    raw_yaml_str = raw_yaml_str[3:]  # Remove opening ```
-                    if raw_yaml_str.endswith("```"):
-                        raw_yaml_str = raw_yaml_str[:-3]  # Remove closing ```
-                    raw_yaml_str = raw_yaml_str.strip()
-                else:
-                    raw_yaml_str = re.sub(r'^\s*\.\s*\n', '', f"title: \"{raw_yaml_str}")
-                # Check if it already has a proper YAML structure
-                if not raw_yaml_str.startswith(('title:', 'title :')):
-                    # Only wrap with title if it doesn't already have one
-                    # Also escape any quotes in the string to prevent YAML parsing issues
-                    escaped_yaml = raw_yaml_str.replace('"', '\\"')
-                    raw_yaml_str = f'title: "{escaped_yaml}"'
-                else:
-                    # If it already has title:, just clean it up
-                    raw_yaml_str = re.sub(r'^\s*\.\s*\n', '', raw_yaml_str)
                 sanitized_yaml = sanitize_yaml_response(raw_yaml_str)
                 try:
@@ -631,13 +248,12 @@ title: \""""
     async def generate_caption(self, title: str, description: str, llm_config: Optional[dict] = None) -> str:
         """Generate detailed caption using HF text generation"""
         try:
-            prompt = f"""Generate a detailed story for a video named: "{title}"
-Visual description of the video: {description}.
-Instructions: Write the story summary, including the plot, action, what should happen.
-Make it around 200-300 words long.
-A video can be anything from a tutorial, webcam, trailer, movie, live stream etc."""
-            response = await self._generate_text(
                 prompt,
                 llm_config=llm_config,
                 max_new_tokens=180,
@@ -684,67 +300,32 @@ A video can be anything from a tutorial, webcam, trailer, movie, live stream etc
             # Create an appropriate prompt based on whether this is the first simulation
             chat_section = ""
             if chat_messages:
                 chat_section = f"""
 People are watching this content right now and have shared their thoughts. Like a game master, please take their feedback as input to adjust the story and/or the scene. Here are their messages:
 {chat_messages}
 """
             if is_first_simulation:
-                prompt = f"""You are tasked with evolving the narrative for a video titled: "{original_title}"
-Original description:
-{original_description}
-{chat_section}
-Instructions:
-1. Imagine the next logical scene or development that would follow the current description.
-2. Consider the video context and recent events
-3. Create a natural progression from previous clips
-4. Take into account user suggestions (chat messages) into the scene
-5. IMPORTANT: viewers have shared messages, consider their input in priority to guide your story, and incorporate relevant suggestions or reactions into your narrative evolution.
-6. Keep visual consistency with previous clips (in most cases you should repeat the same exact description of the location, characters etc but only change a few elements. If this is a webcam scenario, don't touch the camera orientation or focus)
-7. Return ONLY the caption text, no additional formatting or explanation
-8. Write in English, about 200 words.
-9. Keep the visual style consistant, but content as well (repeat the style, character, locations, appearance etc..from the previous description, when it makes sense).
-10. Your caption must describe visual elements of the scene in details, including: camera angle and focus, people's appearance, age, look, costumes, clothes, the location visual characteristics and geometry, lighting, action, objects, weather, textures, lighting.
-11. Please write in the same style as the original description, by keeping things brief etc.
-Remember to obey to what users said in the chat history!!
-Now, you must write down the new scene description (don't write a long story! write a synthetic description!):"""
             else:
-                prompt = f"""You are tasked with continuing to evolve the narrative for a video titled: "{original_title}"
-Original description:
-{original_description}
-Condensed history of scenes so far:
-{condensed_history}
-Current description (most recent scene):
-{current_description}
-{chat_section}
-Instructions:
-1. Imagine the next logical scene or development that would follow the current description.
-2. Consider the video context and recent events
-3. Create a natural progression from previous clips
-4. Take into account user suggestions (chat messages) into the scene
-5. IMPORTANT: if viewers have shared messages, consider their input in priority to guide your story, and incorporate relevant suggestions or reactions into your narrative evolution.
-6. Keep visual consistency with previous clips (in most cases you should repeat the same exact description of the location, characters etc but only change a few elements. If this is a webcam scenario, don't touch the camera orientation or focus)
-7. Return ONLY the caption text, no additional formatting or explanation
-8. Write in English, about 200 words.
-9. Keep the visual style consistant, but content as well (repeat the style, character, locations, appearance etc..from the previous description, when it makes sense).
-10. Your caption must describe visual elements of the scene in details, including: camera angle and focus, people's appearance, age, look, costumes, clothes, the location visual characteristics and geometry, lighting, action, objects, weather, textures, lighting.
-11. Please write in the same style as the original description, by keeping things brief etc.
-Remember to obey to what users said in the chat history!!
-Now, you must write down the new scene description (don't write a long story! write a synthetic description!):"""
             # Generate the evolved description using the helper method
-            response = await self._generate_text(
                 prompt,
                 llm_config=llm_config,
                 max_new_tokens=240,
@@ -774,109 +355,25 @@ Now, you must write down the new scene description (don't write a long story! wr
                 "condensed_history": condensed_history
             }
-    def get_config_value(self, role: UserRole, field: str, options: dict = None) -> Any:
-        """
-        Get the appropriate config value for a user role.
-        Args:
-            role: The user role ('anon', 'normal', 'pro', 'admin')
-            field: The config field name to retrieve
-            options: Optional user-provided options that may override defaults
-        Returns:
-            The config value appropriate for the user's role with respect to
-            min/max boundaries and user overrides.
-        """
-        # Select the appropriate config based on user role
-        if role == 'admin':
-            config = CONFIG_FOR_ADMIN_HF_USERS
-        elif role == 'pro':
-            config = CONFIG_FOR_PRO_HF_USERS
-        elif role == 'normal':
-            config = CONFIG_FOR_STANDARD_HF_USERS
-        else:  # Anonymous users
-            config = CONFIG_FOR_ANONYMOUS_USERS
-        # Get the default value for this field from the config
-        default_value = config.get(f"default_{field}", None)
-        # For fields that have min/max bounds
-        min_field = f"min_{field}"
-        max_field = f"max_{field}"
-        # Check if min/max constraints exist for this field
-        has_constraints = min_field in config or max_field in config
-        if not has_constraints:
-            # For fields without constraints, just return the value from config
-            return default_value
-        # Get min and max values from config (if they exist)
-        min_value = config.get(min_field, None)
-        max_value = config.get(max_field, None)
-        # If user provided options with this field
-        if options and field in options:
-            user_value = options[field]
-            # Apply constraints if they exist
-            if min_value is not None and user_value < min_value:
-                return min_value
-            if max_value is not None and user_value > max_value:
-                return max_value
-            # If within bounds, use the user's value
-            return user_value
-        # If no user value, return the default
-        return default_value
     async def _generate_clip_prompt(self, video_id: str, title: str, description: str) -> str:
         """Generate a new prompt for the next clip based on event history"""
         events = self.video_events.get(video_id, [])
         events_json = "\n".join(json.dumps(event) for event in events)
-        prompt = f"""# Context and task
-Please write the caption for a new clip.
-# Instructions
-1. Consider the video context and recent events
-2. Create a natural progression from previous clips
-3. Take into account user suggestions (chat messages) into the scene
-4. Don't generate hateful, political, violent or sexual content
-5. Keep visual consistency with previous clips (in most cases you should repeat the same exact description of the location, characters etc but only change a few elements. If this is a webcam scenario, don't touch the camera orientation or focus)
-6. Return ONLY the caption text, no additional formatting or explanation
-7. Write in English, about 200 words.
-8. Keep the visual style consistant, but content as well (repeat the style, character, locations, appearance etc.. across scenes, when it makes sense).
-8. Your caption must describe visual elements of the scene in details, including: camera angle and focus, people's appearance, age, look, costumes, clothes, the location visual characteristics and geometry, lighting, action, objects, weather, textures, lighting.
-# Examples
-Here is a demo scenario, with fake data:
-{{"time": "2024-11-29T13:36:15Z", "event": "new_stream_clip", "caption": "webcam view of a beautiful park, squirrels are playing in the lush grass, blablabla etc... (rest omitted for brevity)"}}
-{{"time": "2024-11-29T13:36:20Z", "event": "new_chat_message", "username": "MonkeyLover89", "data": "hi"}}
-{{"time": "2024-11-29T13:36:25Z", "event": "new_chat_message", "username": "MonkeyLover89", "data": "more squirrels plz"}}
-{{"time": "2024-11-29T13:36:26Z", "event": "new_stream_clip", "caption": "webcam view of a beautiful park, a lot of squirrels are playing in the lush grass, blablabla etc... (rest omitted for brevity)"}}
-# Real scenario and data
-We are inside a video titled "{title}"
-The video is described by: "{description}".
-Here is a summary of the {len(events)} most recent events:
-{events_json}
-# Your response
-Your caption:"""
         try:
-            response = await asyncio.get_event_loop().run_in_executor(
-                None,
-                lambda: self.inference_client.text_generation(
-                    prompt,
-                    model=TEXT_MODEL,
-                    max_new_tokens=200,
-                    temperature=0.7
-                )
             )
             # Clean up the response
@@ -952,7 +449,8 @@ Your caption:"""
             start_time = time.time()
             # Rest of thumbnail generation logic same as regular video but with optimized settings
-            result = await self._generate_video_content_with_inference_endpoints(
                 prompt=prompt,
                 negative_prompt=options.get('negative_prompt', NEGATIVE_PROMPT),
                 width=width,
@@ -1000,11 +498,11 @@ Your caption:"""
         prompt = f"{clip_caption}, {POSITIVE_PROMPT_SUFFIX}"
         # Get the config values based on user role
-        width = self.get_config_value(user_role, 'clip_width', options)
-        height = self.get_config_value(user_role, 'clip_height', options)
-        num_frames = self.get_config_value(user_role, 'num_frames', options)
-        num_inference_steps = self.get_config_value(user_role, 'num_inference_steps', options)
-        frame_rate = self.get_config_value(user_role, 'clip_framerate', options)
         # Get orientation from options
         orientation = options.get('orientation', 'LANDSCAPE')
@@ -1025,7 +523,8 @@ Your caption:"""
         # Generate the video with standard settings
         # historically we used _generate_video_content_with_inference_endpoints,
         # which offers better performance and relability, but costs were spinning out of control
-        return await self._generate_video_content_with_inference_endpoints(
             prompt=prompt,
             negative_prompt=options.get('negative_prompt', NEGATIVE_PROMPT),
             width=width,
@@ -1037,267 +536,26 @@ Your caption:"""
             options=options,
             user_role=user_role
         )
-    async def _generate_video_content_with_inference_endpoints(self, prompt: str, negative_prompt: str, width: int,
-                                     height: int, num_frames: int, num_inference_steps: int,
-                                     frame_rate: int, seed: int, options: dict, user_role: UserRole) -> str:
-        """
-        Internal method to generate video content with specific parameters.
-        Used by both regular video generation and thumbnail generation.
-        """
-        is_thumbnail = options.get('thumbnail', False)
-        request_id = options.get('request_id', str(uuid.uuid4())[:8])  # Get or generate request ID
-        video_id = options.get('video_id', 'unknown')
-        # logger.info(f"[{request_id}] Generating {'thumbnail' if is_thumbnail else 'video'} for video {video_id} with seed {seed}")
-        json_payload = {
-            "inputs": {
-                "prompt": prompt,
-            },
-            "parameters": {
-                # ------------------- settings for LTX-Video -----------------------
-                "negative_prompt": negative_prompt,
-                "width": width,
-                "height": height,
-                "num_frames": num_frames,
-                "num_inference_steps": num_inference_steps,
-                "guidance_scale": options.get('guidance_scale', GUIDANCE_SCALE),
-                "seed": seed,
-                # ------------------- settings for Varnish -----------------------
-                "double_num_frames": False,  # <- False for real-time generation
-                "fps": frame_rate,
-                "super_resolution": False,  # <- False for real-time generation
-                "grain_amount": 0,  # No film grain (on low-res, low-quality generation the effects aren't worth it + it adds weight to the MP4 payload)
-            }
-        }
-        # Add thumbnail flag to help with metrics and debugging
-        if is_thumbnail:
-            json_payload["metadata"] = {
-                "is_thumbnail": True,
-                "thumbnail_version": "1.0",
-                "request_id": request_id
-            }
-        # logger.info(f"[{request_id}] Waiting for an available endpoint...")
-        async with self.endpoint_manager.get_endpoint() as endpoint:
-            # logger.info(f"[{request_id}] Using endpoint {endpoint.id} for generation")
-            try:
-                async with ClientSession() as session:
-                    #logger.info(f"[{request_id}] Sending request to endpoint {endpoint.id}: {endpoint.url}")
-                    start_time = time.time()
-                    # Proceed with actual request
-                    async with session.post(
-                        endpoint.url,
-                        headers={
-                            "Accept": "application/json",
-                            "Authorization": f"Bearer {HF_TOKEN}",
-                            "Content-Type": "application/json",
-                            "X-Request-ID": request_id  # Add request ID to headers
-                        },
-                        json=json_payload,
-                        timeout=12  # Extended timeout for thumbnails (was 8s)
-                    ) as response:
-                        request_duration = time.time() - start_time
-                        #logger.info(f"[{request_id}] Received response from endpoint {endpoint.id} in {request_duration:.2f}s: HTTP {response.status}")
-                        if response.status != 200:
-                            error_text = await response.text()
-                            logger.error(f"[{request_id}] Failed response: {error_text}")
-                            # Mark endpoint as in error state
-                            await self._mark_endpoint_error(endpoint)
-                            if "paused" in error_text:
-                                logger.error(f"[{request_id}] Endpoint is paused")
-                                return ""
-                            raise Exception(f"Video generation failed: HTTP {response.status} - {error_text}")
-                        result = await response.json()
-                        #logger.info(f"[{request_id}] Successfully parsed JSON response")
-                        if "error" in result:
-                            error_msg = result['error']
-                            logger.error(f"[{request_id}] Error in response: {error_msg}")
-                            # Mark endpoint as in error state
-                            await self._mark_endpoint_error(endpoint)
-                            if "paused" in str(error_msg).lower():
-                                logger.error(f"[{request_id}] Endpoint is paused")
-                                return ""
-                            raise Exception(f"Video generation failed: {error_msg}")
-                        video_data_uri = result.get("video")
-                        if not video_data_uri:
-                            logger.error(f"[{request_id}] No video data in response")
-                            # Mark endpoint as in error state
-                            await self._mark_endpoint_error(endpoint)
-                            raise Exception("No video data in response")
-                        # Get data size
-                        data_size = len(video_data_uri)
-                        #logger.info(f"[{request_id}] Received video data: {data_size} chars")
-                        # Reset error count on successful call
-                        endpoint.error_count = 0
-                        endpoint.error_until = 0
-                        return video_data_uri
-            except asyncio.TimeoutError:
-                # Handle timeout specifically
-                logger.error(f"[{request_id}] Timeout occurred after {time.time() - start_time:.2f}s")
-                await self._mark_endpoint_error(endpoint, is_timeout=True)
-                return ""
-            except Exception as e:
-                # Handle all other exceptions
-                logger.error(f"[{request_id}] Exception during video generation: {str(e)}")
-                if not isinstance(e, asyncio.TimeoutError):  # Already handled above
-                    await self._mark_endpoint_error(endpoint)
-                return ""
-    async def _generate_video_content_with_gradio(self, prompt: str, negative_prompt: str, width: int,
-                                     height: int, num_frames: int, num_inference_steps: int,
-                                     frame_rate: int, seed: int, options: dict, user_role: UserRole) -> str:
-        """
-        Internal method to generate video content with specific parameters.
-        Used by both regular video generation and thumbnail generation.
-        This version use our generic gradio space.
-        """
-        is_thumbnail = options.get('thumbnail', False)
-        request_id = options.get('request_id', str(uuid.uuid4())[:8])  # Get or generate request ID
-        video_id = options.get('video_id', 'unknown')
-        # logger.info(f"[{request_id}] Generating {'thumbnail' if is_thumbnail else 'video'} for video {video_id} with seed {seed}")
-        # Define the synchronous function
-        def _sync_gradio_call():
-            client = Client("jbilcke-hf/fast-rendering-node", hf_token=HF_TOKEN)
-            return client.predict(
-                prompt=prompt,
-                seed=seed,
-                fps=8, # frame_rate, # attention, right now tilslop asks for 25 FPS
-                width=640, # width, # attention, right now tikslop asks for 1152
-                height=320, # height, # attention, righ tnow tikslop asks for 640
-                duration=3, # num_frames // frame_rate
-            )
-        # Run in a thread using asyncio.to_thread (Python 3.9+)
-        video_data_uri = await asyncio.to_thread(_sync_gradio_call)
-        return video_data_uri
-    async def _mark_endpoint_error(self, endpoint: Endpoint, is_timeout: bool = False):
-        """Mark an endpoint as being in error state with exponential backoff"""
-        async with self.endpoint_manager.lock:
-            endpoint.error_count += 1
-            # Calculate backoff time exponentially based on error count
-            # Start with 15 seconds, then 30, 60, etc. up to a max of 5 minutes
-            # Using shorter backoffs since generation should be fast
-            backoff_seconds = min(15 * (2 ** (endpoint.error_count - 1)), 300)
-            # Add extra backoff for timeouts which are more indicative of serious issues
-            if is_timeout:
-                backoff_seconds *= 2
-            endpoint.error_until = time.time() + backoff_seconds
-            logger.warning(
-                f"Endpoint {endpoint.id} marked as in error state (count: {endpoint.error_count}, "
-                f"unavailable until: {datetime.datetime.fromtimestamp(endpoint.error_until).strftime('%H:%M:%S')})"
-            )
     async def handle_chat_message(self, data: dict, ws: web.WebSocketResponse) -> dict:
         """Process and broadcast a chat message"""
         video_id = data.get('videoId')
-        request_id = data.get('requestId')
-        if not video_id:
-            return {
-                'action': 'chat_message',
-                'requestId': request_id,
-                'success': False,
-                'error': 'No video ID provided'
-            }
         # Add chat message to event history
-        self._add_event(video_id, {
-            "time": datetime.datetime.utcnow().isoformat() + "Z",
-            "event": "new_chat_message",
-            "username": data.get('username', 'Anonymous'),
-            "data": data.get('content', '')
-        })
-        room = self.chat_rooms[video_id]
-        message_data = {k: v for k, v in data.items() if k != '_ws'}
-        room.add_message(message_data)
-        for client in room.connected_clients:
-            if client != ws:
-                try:
-                    await client.send_json({
-                        'action': 'chat_message',
-                        'broadcast': True,
-                        **message_data
-                    })
-                except Exception as e:
-                    logger.error(f"Failed to broadcast to client: {e}")
-                    room.connected_clients.remove(client)
-        return {
-            'action': 'chat_message',
-            'requestId': request_id,
-            'success': True,
-            'message': message_data
-        }
     async def handle_join_chat(self, data: dict, ws: web.WebSocketResponse) -> dict:
         """Handle a request to join a chat room"""
-        video_id = data.get('videoId')
-        request_id = data.get('requestId')
-        if not video_id:
-            return {
-                'action': 'join_chat',
-                'requestId': request_id,
-                'success': False,
-                'error': 'No video ID provided'
-            }
-        room = self.chat_rooms[video_id]
-        room.connected_clients.add(ws)
-        recent_messages = room.get_recent_messages()
-        return {
-            'action': 'join_chat',
-            'requestId': request_id,
-            'success': True,
-            'messages': recent_messages
-        }
     async def handle_leave_chat(self, data: dict, ws: web.WebSocketResponse) -> dict:
         """Handle a request to leave a chat room"""
-        video_id = data.get('videoId')
-        request_id = data.get('requestId')
-        if not video_id:
-            return {
-                'action': 'leave_chat',
-                'requestId': request_id,
-                'success': False,
-                'error': 'No video ID provided'
-            }
-        room = self.chat_rooms[video_id]
-        if ws in room.connected_clients:
-            room.connected_clients.remove(ws)
-        return {
-            'action': 'leave_chat',
-            'requestId': request_id,
-            'success': True
-        }

 import re
 import base64
 import uuid
+from typing import Dict, Any, Optional, List
 import asyncio
 import time
 import datetime
 from collections import defaultdict
 from aiohttp import web, ClientSession
+from huggingface_hub import HfApi
 from gradio_client import Client
 import random
 import yaml
 import json
+from .api_config import *
+from .models import UserRole
+from .endpoint_manager import EndpointManager
+from .utils import generate_seed, sanitize_yaml_response
+from .chat import ChatManager
+from .config_utils import get_config_value
+from .video_utils import (
+    generate_video_content_with_inference_endpoints,
+    generate_video_content_with_gradio
+)
+from .llm_utils import (
+    get_inference_client,
+    generate_text,
+    SEARCH_VIDEO_PROMPT_TEMPLATE,
+    GENERATE_CAPTION_PROMPT_TEMPLATE,
+    SIMULATE_VIDEO_FIRST_PROMPT_TEMPLATE,
+    SIMULATE_VIDEO_CONTINUE_PROMPT_TEMPLATE,
+    GENERATE_CLIP_PROMPT_TEMPLATE
 )
+# Configure logging
+from .logging_utils import get_logger
+logger = get_logger(__name__)
 class VideoGenerationAPI:
     def __init__(self):
         self.hf_api = HfApi(token=HF_TOKEN)
         self.endpoint_manager = EndpointManager()
         self.active_requests: Dict[str, asyncio.Future] = {}
+        self.chat_manager = ChatManager()
         self.video_events: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
         self.event_history_limit = 50
         # Cache for user roles to avoid repeated API calls
         self.user_role_cache: Dict[str, Dict[str, Any]] = {}
         # Cache expiration time (10 minutes)
         self.cache_expiration = 600
     def _add_event(self, video_id: str, event: Dict[str, Any]):
         """Add an event to the video's history and maintain the size limit"""
         temperature = random.uniform(0.68, 0.72)
         while current_attempt <= max_attempts:
+            prompt = SEARCH_VIDEO_PROMPT_TEMPLATE.format(
+                current_attempt=current_attempt,
+                query=query
+            )
             try:
+                raw_yaml_str = await generate_text(
                     prompt,
                     llm_config=llm_config,
                     max_new_tokens=200,
                 #logger.info(f"search_video(): raw_yaml_str = {raw_yaml_str}")
+                # All pre-processing is now handled in sanitize_yaml_response
                 sanitized_yaml = sanitize_yaml_response(raw_yaml_str)
                 try:
     async def generate_caption(self, title: str, description: str, llm_config: Optional[dict] = None) -> str:
         """Generate detailed caption using HF text generation"""
         try:
+            prompt = GENERATE_CAPTION_PROMPT_TEMPLATE.format(
+                title=title,
+                description=description
+            )
+            response = await generate_text(
                 prompt,
                 llm_config=llm_config,
                 max_new_tokens=180,
             # Create an appropriate prompt based on whether this is the first simulation
             chat_section = ""
             if chat_messages:
+                logger.info(f"CHAT_DEBUG: Server received chat messages for simulation: {chat_messages}")
                 chat_section = f"""
 People are watching this content right now and have shared their thoughts. Like a game master, please take their feedback as input to adjust the story and/or the scene. Here are their messages:
 {chat_messages}
 """
+            else:
+                logger.info("CHAT_DEBUG: Server simulation called with no chat messages")
             if is_first_simulation:
+                prompt = SIMULATE_VIDEO_FIRST_PROMPT_TEMPLATE.format(
+                    original_title=original_title,
+                    original_description=original_description,
+                    chat_section=chat_section
+                )
             else:
+                prompt = SIMULATE_VIDEO_CONTINUE_PROMPT_TEMPLATE.format(
+                    original_title=original_title,
+                    original_description=original_description,
+                    condensed_history=condensed_history,
+                    current_description=current_description,
+                    chat_section=chat_section
+                )
             # Generate the evolved description using the helper method
+            response = await generate_text(
                 prompt,
                 llm_config=llm_config,
                 max_new_tokens=240,
                 "condensed_history": condensed_history
             }
     async def _generate_clip_prompt(self, video_id: str, title: str, description: str) -> str:
         """Generate a new prompt for the next clip based on event history"""
         events = self.video_events.get(video_id, [])
         events_json = "\n".join(json.dumps(event) for event in events)
+        prompt = GENERATE_CLIP_PROMPT_TEMPLATE.format(
+            title=title,
+            description=description,
+            event_count=len(events),
+            events_json=events_json
+        )
         try:
+            # Use the imported generate_text function instead
+            response = await generate_text(
+                prompt,
+                llm_config=None,  # Use default config
+                max_new_tokens=200,
+                temperature=0.7
             )
             # Clean up the response
             start_time = time.time()
             # Rest of thumbnail generation logic same as regular video but with optimized settings
+            result = await generate_video_content_with_inference_endpoints(
+                self.endpoint_manager,
                 prompt=prompt,
                 negative_prompt=options.get('negative_prompt', NEGATIVE_PROMPT),
                 width=width,
         prompt = f"{clip_caption}, {POSITIVE_PROMPT_SUFFIX}"
         # Get the config values based on user role
+        width = get_config_value(user_role, 'clip_width', options)
+        height = get_config_value(user_role, 'clip_height', options)
+        num_frames = get_config_value(user_role, 'num_frames', options)
+        num_inference_steps = get_config_value(user_role, 'num_inference_steps', options)
+        frame_rate = get_config_value(user_role, 'clip_framerate', options)
         # Get orientation from options
         orientation = options.get('orientation', 'LANDSCAPE')
         # Generate the video with standard settings
         # historically we used _generate_video_content_with_inference_endpoints,
         # which offers better performance and relability, but costs were spinning out of control
+        return await generate_video_content_with_inference_endpoints(
+            self.endpoint_manager,
             prompt=prompt,
             negative_prompt=options.get('negative_prompt', NEGATIVE_PROMPT),
             width=width,
             options=options,
             user_role=user_role
         )
     async def handle_chat_message(self, data: dict, ws: web.WebSocketResponse) -> dict:
         """Process and broadcast a chat message"""
         video_id = data.get('videoId')
         # Add chat message to event history
+        if video_id:
+            self._add_event(video_id, {
+                "time": datetime.datetime.utcnow().isoformat() + "Z",
+                "event": "new_chat_message",
+                "username": data.get('username', 'Anonymous'),
+                "data": data.get('content', '')
+            })
+        return await self.chat_manager.handle_chat_message(data, ws)
     async def handle_join_chat(self, data: dict, ws: web.WebSocketResponse) -> dict:
         """Handle a request to join a chat room"""
+        return await self.chat_manager.handle_join_chat(data, ws)
     async def handle_leave_chat(self, data: dict, ws: web.WebSocketResponse) -> dict:
         """Handle a request to leave a chat room"""
+        return await self.chat_manager.handle_leave_chat(data, ws)

api_metrics.py → server/api_metrics.py RENAMED Viewed

File without changes

api_session.py → server/api_session.py RENAMED Viewed

@@ -5,9 +5,10 @@ from aiohttp import web, WSMsgType
 import json
 import time
 import datetime
-from api_core import VideoGenerationAPI
-logger = logging.getLogger(__name__)
 class UserSession:
     """
@@ -50,13 +51,14 @@ class UserSession:
     async def start(self):
         """Start all the queue processors for this session"""
         # Start background tasks for handling different request types
         self.background_tasks = [
             asyncio.create_task(self._process_chat_queue()),
             asyncio.create_task(self._process_video_queue()),
             asyncio.create_task(self._process_search_queue()),
             asyncio.create_task(self._process_simulation_queue())  # New worker for simulation requests
         ]
-        logger.info(f"Started session for user {self.user_id} with role {self.user_role}")
     async def stop(self):
         """Stop all background tasks for this session"""
@@ -114,88 +116,127 @@ class UserSession:
     async def _process_video_queue(self):
         """Process multiple video generation requests in parallel for this user"""
-        from api_config import VIDEO_ROUND_ROBIN_ENDPOINT_URLS
-        active_tasks = set()
-        # Set a per-user concurrent limit based on role
-        max_concurrent = len(VIDEO_ROUND_ROBIN_ENDPOINT_URLS)
-        if self.user_role == 'anon':
-            max_concurrent = min(2, max_concurrent)  # Limit anonymous users
-        elif self.user_role == 'normal':
-            max_concurrent = min(4, max_concurrent)  # Standard users
-        # Pro and admin can use all endpoints
-        async def process_single_request(data):
             try:
-                title = data.get('title', '')
-                description = data.get('description', '')
-                video_prompt_prefix = data.get('video_prompt_prefix', '')
-                options = data.get('options', {})
-                # Pass the user role to generate_video
-                video_data = await self.shared_api.generate_video(
-                    title, description, video_prompt_prefix, options, self.user_role
-                )
-                result = {
-                    'action': 'generate_video',
-                    'requestId': data.get('requestId'),
-                    'success': True,
-                    'video': video_data,
-                }
-                await self.ws.send_json(result)
-                # Update metrics
-                self.request_counts['video'] += 1
-                self.last_request_times['video'] = time.time()
-            except Exception as e:
-                logger.error(f"Error processing video request for user {self.user_id}: {e}")
                 try:
-                    await self.ws.send_json({
-                        'action': 'generate_video',
-                        'requestId': data.get('requestId'),
-                        'success': False,
-                        'error': f'Video generation error: {str(e)}'
-                    })
-                except Exception as send_error:
-                    logger.error(f"Error sending error response: {send_error}")
-            finally:
-                active_tasks.discard(asyncio.current_task())
-        while True:
-            # Clean up completed tasks
-            active_tasks = {task for task in active_tasks if not task.done()}
-            # Start new tasks if we have capacity
-            while len(active_tasks) < max_concurrent:
                 try:
-                    # Use try_get to avoid blocking if queue is empty
-                    data = await asyncio.wait_for(self.video_queue.get(), timeout=0.1)
-                    # Create and start new task
-                    task = asyncio.create_task(process_single_request(data))
-                    active_tasks.add(task)
-                except asyncio.TimeoutError:
-                    # No items in queue, break inner loop
-                    break
                 except Exception as e:
-                    logger.error(f"Error creating video generation task for user {self.user_id}: {e}")
-                    break
-            # Wait a short time before checking queue again
-            await asyncio.sleep(0.1)
-            # Handle any completed tasks' errors
-            for task in list(active_tasks):
-                if task.done():
                     try:
-                        await task
                     except Exception as e:
-                        logger.error(f"Task failed with error for user {self.user_id}: {e}")
-                    active_tasks.discard(task)
     async def _process_search_queue(self):
         """Medium priority queue for search operations"""

 import json
 import time
 import datetime
+from .api_core import VideoGenerationAPI
+from .logging_utils import get_logger
+logger = get_logger(__name__)
 class UserSession:
     """
     async def start(self):
         """Start all the queue processors for this session"""
         # Start background tasks for handling different request types
+        logger.info(f"Creating background tasks for user {self.user_id}")
         self.background_tasks = [
             asyncio.create_task(self._process_chat_queue()),
             asyncio.create_task(self._process_video_queue()),
             asyncio.create_task(self._process_search_queue()),
             asyncio.create_task(self._process_simulation_queue())  # New worker for simulation requests
         ]
+        logger.info(f"Started session for user {self.user_id} with role {self.user_role}, created {len(self.background_tasks)} background tasks")
     async def stop(self):
         """Stop all background tasks for this session"""
     async def _process_video_queue(self):
         """Process multiple video generation requests in parallel for this user"""
+        try:
             try:
+                from .api_config import VIDEO_ROUND_ROBIN_ENDPOINT_URLS
+            except ImportError:
                 try:
+                    from server.api_config import VIDEO_ROUND_ROBIN_ENDPOINT_URLS
+                except ImportError:
+                    logger.error(f"Failed to import VIDEO_ROUND_ROBIN_ENDPOINT_URLS for user {self.user_id}")
+                    return
+            active_tasks = set()
+            # Set a per-user concurrent limit based on role
+            max_concurrent = len(VIDEO_ROUND_ROBIN_ENDPOINT_URLS)
+            if self.user_role == 'anon':
+                max_concurrent = min(2, max_concurrent)  # Limit anonymous users
+            elif self.user_role == 'normal':
+                max_concurrent = min(4, max_concurrent)  # Standard users
+            # Pro and admin can use all endpoints
+            async def process_single_request(data):
+                request_id = data.get('requestId', 'unknown')
                 try:
+                    title = data.get('title', '')
+                    description = data.get('description', '')
+                    video_prompt_prefix = data.get('video_prompt_prefix', '')
+                    options = data.get('options', {})
+                    #logger.info(f"Starting video generation for user {self.user_id}: title='{title[:50]}...', role={self.user_role}")
+                    start_time = time.time()
+                    # Pass the user role to generate_video
+                    video_data = await self.shared_api.generate_video(
+                        title, description, video_prompt_prefix, options, self.user_role
+                    )
+                    generation_time = time.time() - start_time
+                    logger.info(f"generated clip in {generation_time:.2f}s (len: {len(video_data) if video_data else 0})")
+                    result = {
+                        'action': 'generate_video',
+                        'requestId': data.get('requestId'),
+                        'success': True,
+                        'video': video_data,
+                    }
+                    #logger.info(f"Sending video generation response to user {self.user_id}")
+                    await self.ws.send_json(result)
+                    # Update metrics
+                    self.request_counts['video'] += 1
+                    self.last_request_times['video'] = time.time()
                 except Exception as e:
+                    logger.error(f"Error processing video request for user {self.user_id}: {e}")
+                    try:
+                        logger.info(f"Sending error response to user {self.user_id}")
+                        await self.ws.send_json({
+                            'action': 'generate_video',
+                            'requestId': data.get('requestId'),
+                            'success': False,
+                            'error': f'Video generation error: {str(e)}'
+                        })
+                    except Exception as send_error:
+                        logger.error(f"Error sending error response: {send_error}")
+                finally:
+                    active_tasks.discard(asyncio.current_task())
+            logger.info(f"Video queue processor started for user {self.user_id} with max_concurrent={max_concurrent}")
+            while True:
+                # Clean up completed tasks
+                active_tasks = {task for task in active_tasks if not task.done()}
+                # Log queue processing activity every few iterations
+                if hasattr(self, '_queue_debug_counter'):
+                    self._queue_debug_counter += 1
+                else:
+                    self._queue_debug_counter = 1
+                if self._queue_debug_counter % 50 == 0:  # Log every 5 seconds (50 * 0.1s)
+                    queue_size = self.video_queue.qsize()
+                    # let's hide this log, it is too verbose
+                    #logger.info(f"Video queue processor heartbeat for user {self.user_id}: queue_size={queue_size}, active_tasks={len(active_tasks)}/{max_concurrent}")
+                # Start new tasks if we have capacity
+                while len(active_tasks) < max_concurrent:
                     try:
+                        # Use try_get to avoid blocking if queue is empty
+                        data = await asyncio.wait_for(self.video_queue.get(), timeout=0.1)
+                        request_id = data.get('requestId', 'unknown')
+                        #logger.info(f"[{request_id}] Picked up video request from queue for user {self.user_id}, creating task (active: {len(active_tasks)}/{max_concurrent})")
+                        # Create and start new task
+                        task = asyncio.create_task(process_single_request(data))
+                        active_tasks.add(task)
+                    except asyncio.TimeoutError:
+                        # No items in queue, break inner loop
+                        break
                     except Exception as e:
+                        logger.error(f"Error creating video generation task for user {self.user_id}: {e}")
+                        break
+                # Wait a short time before checking queue again
+                await asyncio.sleep(0.1)
+                # Handle any completed tasks' errors
+                for task in list(active_tasks):
+                    if task.done():
+                        try:
+                            await task
+                        except Exception as e:
+                            logger.error(f"Task failed with error for user {self.user_id}: {e}")
+                        active_tasks.discard(task)
+        except Exception as e:
+            logger.error(f"Video queue processor crashed for user {self.user_id}: {e}")
+            import traceback
+            logger.error(f"Video queue processor traceback: {traceback.format_exc()}")
+            raise  # Re-raise to ensure the error is visible
     async def _process_search_queue(self):
         """Medium priority queue for search operations"""

server/chat.py ADDED Viewed

	@@ -0,0 +1,101 @@

+"""
+Chat-related functionality for video interactions.
+"""
+import datetime
+import logging
+from collections import defaultdict
+from typing import Dict, List, Any
+from aiohttp import web
+from .models import ChatRoom
+logger = logging.getLogger(__name__)
+class ChatManager:
+    """Manages multiple chat rooms for different videos."""
+    def __init__(self):
+        self.chat_rooms = defaultdict(ChatRoom)
+    async def handle_chat_message(self, data: dict, ws: web.WebSocketResponse) -> dict:
+        """Process and broadcast a chat message"""
+        video_id = data.get('videoId')
+        request_id = data.get('requestId')
+        if not video_id:
+            return {
+                'action': 'chat_message',
+                'requestId': request_id,
+                'success': False,
+                'error': 'No video ID provided'
+            }
+        room = self.chat_rooms[video_id]
+        message_data = {k: v for k, v in data.items() if k != '_ws'}
+        room.add_message(message_data)
+        for client in room.connected_clients:
+            if client != ws:
+                try:
+                    await client.send_json({
+                        'action': 'chat_message',
+                        'broadcast': True,
+                        **message_data
+                    })
+                except Exception as e:
+                    logger.error(f"Failed to broadcast to client: {e}")
+                    room.connected_clients.remove(client)
+        return {
+            'action': 'chat_message',
+            'requestId': request_id,
+            'success': True,
+            'message': message_data
+        }
+    async def handle_join_chat(self, data: dict, ws: web.WebSocketResponse) -> dict:
+        """Handle a request to join a chat room"""
+        video_id = data.get('videoId')
+        request_id = data.get('requestId')
+        if not video_id:
+            return {
+                'action': 'join_chat',
+                'requestId': request_id,
+                'success': False,
+                'error': 'No video ID provided'
+            }
+        room = self.chat_rooms[video_id]
+        room.connected_clients.add(ws)
+        recent_messages = room.get_recent_messages()
+        return {
+            'action': 'join_chat',
+            'requestId': request_id,
+            'success': True,
+            'messages': recent_messages
+        }
+    async def handle_leave_chat(self, data: dict, ws: web.WebSocketResponse) -> dict:
+        """Handle a request to leave a chat room"""
+        video_id = data.get('videoId')
+        request_id = data.get('requestId')
+        if not video_id:
+            return {
+                'action': 'leave_chat',
+                'requestId': request_id,
+                'success': False,
+                'error': 'No video ID provided'
+            }
+        room = self.chat_rooms[video_id]
+        if ws in room.connected_clients:
+            room.connected_clients.remove(ws)
+        return {
+            'action': 'leave_chat',
+            'requestId': request_id,
+            'success': True
+        }

server/config_utils.py ADDED Viewed

	@@ -0,0 +1,69 @@

+"""
+Configuration utilities for user role-based settings.
+"""
+from typing import Any, Dict, Optional
+from .models import UserRole
+from .api_config import (
+    CONFIG_FOR_ADMIN_HF_USERS,
+    CONFIG_FOR_PRO_HF_USERS,
+    CONFIG_FOR_STANDARD_HF_USERS,
+    CONFIG_FOR_ANONYMOUS_USERS
+)
+def get_config_value(role: UserRole, field: str, options: Optional[Dict[str, Any]] = None) -> Any:
+    """
+    Get the appropriate config value for a user role.
+    Args:
+        role: The user role ('anon', 'normal', 'pro', 'admin')
+        field: The config field name to retrieve
+        options: Optional user-provided options that may override defaults
+    Returns:
+        The config value appropriate for the user's role with respect to
+        min/max boundaries and user overrides.
+    """
+    # Select the appropriate config based on user role
+    if role == 'admin':
+        config = CONFIG_FOR_ADMIN_HF_USERS
+    elif role == 'pro':
+        config = CONFIG_FOR_PRO_HF_USERS
+    elif role == 'normal':
+        config = CONFIG_FOR_STANDARD_HF_USERS
+    else:  # Anonymous users
+        config = CONFIG_FOR_ANONYMOUS_USERS
+    # Get the default value for this field from the config
+    default_value = config.get(f"default_{field}", None)
+    # For fields that have min/max bounds
+    min_field = f"min_{field}"
+    max_field = f"max_{field}"
+    # Check if min/max constraints exist for this field
+    has_constraints = min_field in config or max_field in config
+    if not has_constraints:
+        # For fields without constraints, just return the value from config
+        return default_value
+    # Get min and max values from config (if they exist)
+    min_value = config.get(min_field, None)
+    max_value = config.get(max_field, None)
+    # If user provided options with this field
+    if options and field in options:
+        user_value = options[field]
+        # Apply constraints if they exist
+        if min_value is not None and user_value < min_value:
+            return min_value
+        if max_value is not None and user_value > max_value:
+            return max_value
+        # If within bounds, use the user's value
+        return user_value
+    # If no user value, return the default
+    return default_value

server/endpoint_manager.py ADDED Viewed

	@@ -0,0 +1,109 @@

+"""
+Endpoint management for video generation services.
+"""
+import time
+import datetime
+import logging
+from asyncio import Lock
+from contextlib import asynccontextmanager
+from typing import List
+from .models import Endpoint
+from .api_config import VIDEO_ROUND_ROBIN_ENDPOINT_URLS
+logger = logging.getLogger(__name__)
+class EndpointManager:
+    """Manages multiple video generation endpoints with load balancing and error handling."""
+    def __init__(self):
+        self.endpoints: List[Endpoint] = []
+        self.lock = Lock()
+        self.initialize_endpoints()
+        self.last_used_index = -1  # Track the last used endpoint for round-robin
+    def initialize_endpoints(self):
+        """Initialize the list of endpoints"""
+        for i, url in enumerate(VIDEO_ROUND_ROBIN_ENDPOINT_URLS):
+            endpoint = Endpoint(id=i + 1, url=url)
+            self.endpoints.append(endpoint)
+    def _get_next_free_endpoint(self):
+        """Get the next available non-busy endpoint, or oldest endpoint if all are busy"""
+        current_time = time.time()
+        # First priority: Get any non-busy and non-error endpoint
+        free_endpoints = [
+            ep for ep in self.endpoints
+            if not ep.busy and current_time > ep.error_until
+        ]
+        if free_endpoints:
+            # Return the least recently used free endpoint
+            return min(free_endpoints, key=lambda ep: ep.last_used)
+        # Second priority: If all busy/error, use round-robin but skip error endpoints
+        tried_count = 0
+        next_index = self.last_used_index
+        while tried_count < len(self.endpoints):
+            next_index = (next_index + 1) % len(self.endpoints)
+            tried_count += 1
+            # If endpoint is not in error state, use it
+            if current_time > self.endpoints[next_index].error_until:
+                self.last_used_index = next_index
+                return self.endpoints[next_index]
+        # If all endpoints are in error state, use the one with earliest error expiry
+        self.last_used_index = next_index
+        return min(self.endpoints, key=lambda ep: ep.error_until)
+    @asynccontextmanager
+    async def get_endpoint(self, max_wait_time: int = 10):
+        """Get the next available endpoint using a context manager"""
+        start_time = time.time()
+        endpoint = None
+        try:
+            while True:
+                if time.time() - start_time > max_wait_time:
+                    raise TimeoutError(f"Could not acquire an endpoint within {max_wait_time} seconds")
+                async with self.lock:
+                    # Get the next available endpoint using our selection strategy
+                    endpoint = self._get_next_free_endpoint()
+                    # Mark it as busy
+                    endpoint.busy = True
+                    endpoint.last_used = time.time()
+                    break
+            yield endpoint
+        finally:
+            if endpoint:
+                async with self.lock:
+                    endpoint.busy = False
+                    endpoint.last_used = time.time()
+    async def mark_endpoint_error(self, endpoint: Endpoint, is_timeout: bool = False):
+        """Mark an endpoint as being in error state with exponential backoff"""
+        async with self.lock:
+            endpoint.error_count += 1
+            # Calculate backoff time exponentially based on error count
+            # Start with 15 seconds, then 30, 60, etc. up to a max of 5 minutes
+            # Using shorter backoffs since generation should be fast
+            backoff_seconds = min(15 * (2 ** (endpoint.error_count - 1)), 300)
+            # Add extra backoff for timeouts which are more indicative of serious issues
+            if is_timeout:
+                backoff_seconds *= 2
+            endpoint.error_until = time.time() + backoff_seconds
+            logger.warning(
+                f"Endpoint {endpoint.id} marked as in error state (count: {endpoint.error_count}, "
+                f"unavailable until: {datetime.datetime.fromtimestamp(endpoint.error_until).strftime('%H:%M:%S')})"
+            )

server/llm_utils.py ADDED Viewed

	@@ -0,0 +1,297 @@

+"""
+LLM-related utilities, templates, and text generation functions.
+"""
+import asyncio
+import logging
+from typing import Optional, Dict, Any
+from huggingface_hub import InferenceClient
+from .api_config import HF_TOKEN, TEXT_MODEL
+logger = logging.getLogger(__name__)
+# LLM prompt templates
+SEARCH_VIDEO_PROMPT_TEMPLATE = """# Instruction
+Your response MUST be a YAML object containing a title and description, consistent with what we can find on a video sharing platform.
+Format your YAML response with only those fields: "title" (a short string) and "description" (string caption of the scene). Do not add any other field.
+In the description field, describe in a very synthetic way the visuals of the first shot (first scene), eg "<STYLE>, medium close-up shot, high angle view. In the foreground a <OPTIONAL AGE> <OPTIONAL GENDER> <CHARACTERS> <ACTIONS>. In the background <DESCRIBE LOCATION, BACKGROUND CHARACTERS, OBJECTS ETC>. The scene is lit by <LIGHTING> <WEATHER>". This is just an example! you MUST replace the <TAGS>!!.
+Don't forget to replace <STYLE> etc, by the actual fields!!
+For the style, be creative, for instance you can use anything like a "documentary footage", "japanese animation", "movie scene", "tv series", "tv show", "security footage" etc.
+If the user ask for something specific eg "movie screencap", "movie scene", "documentary footage" "animation" as a style etc.
+Keep it minimalist but still descriptive, don't use bullets points, use simple words, go to the essential to describe style (cinematic, documentary footage, 3D rendering..), camera modes and angles, characters, age, gender, action, location, lighting, country, costume, time, weather, textures, color palette.. etc). Write about 80 words, and use between 2 and 3 sentences.
+The most import part is to describe the actions and movements in the scene, so don't forget that!
+Don't describe sound, so ever say things like "atmospheric music playing in the background".
+Instead describe the visual elements we can see in the background, be precise, (if there are anything, cars, objects, people, bricks, birds, clouds, trees, leaves or grass then say it so etc).
+Make the result unique and different from previous search results. ONLY RETURN YAML AND WITH ENGLISH CONTENT, NOT CHINESE - DO NOT ADD ANY OTHER COMMENT!
+# Context
+This is attempt {current_attempt}.
+# Input
+Describe the first scene/shot for: "{query}".
+# Output
+```yaml
+title: \""""
+GENERATE_CAPTION_PROMPT_TEMPLATE = """Generate a detailed story for a video named: "{title}"
+Visual description of the video: {description}.
+Instructions: Write the story summary, including the plot, action, what should happen.
+Make it around 200-300 words long.
+A video can be anything from a tutorial, webcam, trailer, movie, live stream etc."""
+SIMULATE_VIDEO_FIRST_PROMPT_TEMPLATE = """You are tasked with evolving the narrative for a video titled: "{original_title}"
+Original description:
+{original_description}
+{chat_section}
+Instructions:
+1. Imagine the next logical scene or development that would follow the current description.
+2. Consider the video context and recent events
+3. Create a natural progression from previous clips
+4. Take into account user suggestions (chat messages) into the scene
+5. IMPORTANT: viewers have shared messages, consider their input in priority to guide your story, and incorporate relevant suggestions or reactions into your narrative evolution.
+6. Keep visual consistency with previous clips (in most cases you should repeat the same exact description of the location, characters etc but only change a few elements. If this is a webcam scenario, don't touch the camera orientation or focus)
+7. Return ONLY the caption text, no additional formatting or explanation
+8. Write in English, about 200 words.
+9. Keep the visual style consistant, but content as well (repeat the style, character, locations, appearance etc..from the previous description, when it makes sense).
+10. Your caption must describe visual elements of the scene in details, including: camera angle and focus, people's appearance, age, look, costumes, clothes, the location visual characteristics and geometry, lighting, action, objects, weather, textures, lighting.
+11. Please write in the same style as the original description, by keeping things brief etc.
+Remember to obey to what users said in the chat history!!
+Now, you must write down the new scene description (don't write a long story! write a synthetic description!):"""
+SIMULATE_VIDEO_CONTINUE_PROMPT_TEMPLATE = """You are tasked with continuing to evolve the narrative for a video titled: "{original_title}"
+Original description:
+{original_description}
+Condensed history of scenes so far:
+{condensed_history}
+Current description (most recent scene):
+{current_description}
+{chat_section}
+Instructions:
+1. Imagine the next logical scene or development that would follow the current description.
+2. Consider the video context and recent events
+3. Create a natural progression from previous clips
+4. Take into account user suggestions (chat messages) into the scene
+5. IMPORTANT: if viewers have shared messages, consider their input in priority to guide your story, and incorporate relevant suggestions or reactions into your narrative evolution.
+6. Keep visual consistency with previous clips (in most cases you should repeat the same exact description of the location, characters etc but only change a few elements. If this is a webcam scenario, don't touch the camera orientation or focus)
+7. Return ONLY the caption text, no additional formatting or explanation
+8. Write in English, about 200 words.
+9. Keep the visual style consistant, but content as well (repeat the style, character, locations, appearance etc..from the previous description, when it makes sense).
+10. Your caption must describe visual elements of the scene in details, including: camera angle and focus, people's appearance, age, look, costumes, clothes, the location visual characteristics and geometry, lighting, action, objects, weather, textures, lighting.
+11. Please write in the same style as the original description, by keeping things brief etc.
+Remember to obey to what users said in the chat history!!
+Now, you must write down the new scene description (don't write a long story! write a synthetic description!):"""
+GENERATE_CLIP_PROMPT_TEMPLATE = """# Context and task
+Please write the caption for a new clip.
+# Instructions
+1. Consider the video context and recent events
+2. Create a natural progression from previous clips
+3. Take into account user suggestions (chat messages) into the scene
+4. Don't generate hateful, political, violent or sexual content
+5. Keep visual consistency with previous clips (in most cases you should repeat the same exact description of the location, characters etc but only change a few elements. If this is a webcam scenario, don't touch the camera orientation or focus)
+6. Return ONLY the caption text, no additional formatting or explanation
+7. Write in English, about 200 words.
+8. Keep the visual style consistant, but content as well (repeat the style, character, locations, appearance etc.. across scenes, when it makes sense).
+8. Your caption must describe visual elements of the scene in details, including: camera angle and focus, people's appearance, age, look, costumes, clothes, the location visual characteristics and geometry, lighting, action, objects, weather, textures, lighting.
+# Examples
+Here is a demo scenario, with fake data:
+{{"time": "2024-11-29T13:36:15Z", "event": "new_stream_clip", "caption": "webcam view of a beautiful park, squirrels are playing in the lush grass, blablabla etc... (rest omitted for brevity)"}}
+{{"time": "2024-11-29T13:36:20Z", "event": "new_chat_message", "username": "MonkeyLover89", "data": "hi"}}
+{{"time": "2024-11-29T13:36:25Z", "event": "new_chat_message", "username": "MonkeyLover89", "data": "more squirrels plz"}}
+{{"time": "2024-11-29T13:36:26Z", "event": "new_stream_clip", "caption": "webcam view of a beautiful park, a lot of squirrels are playing in the lush grass, blablabla etc... (rest omitted for brevity)"}}
+# Real scenario and data
+We are inside a video titled "{title}"
+The video is described by: "{description}".
+Here is a summary of the {event_count} most recent events:
+{events_json}
+# Your response
+Your caption:"""
+def get_inference_client(llm_config: Optional[dict] = None) -> InferenceClient:
+    """
+    Get an InferenceClient configured with the provided LLM settings.
+    Priority order for API keys:
+    1. Provider-specific API key (if provided)
+    2. User's HF token (if provided)
+    3. Server's HF token (only for built-in provider)
+    4. Raise exception if no valid key is available
+    """
+    if not llm_config:
+        if HF_TOKEN:
+            return InferenceClient(
+                model=TEXT_MODEL,
+                token=HF_TOKEN
+            )
+        else:
+            raise ValueError("Built-in provider is not available. Server HF_TOKEN is not configured.")
+    provider = llm_config.get('provider', '').lower()
+    #logger.info(f"provider = {provider}")
+    # If no provider or model specified, use default
+    if not provider or provider == 'built-in':
+        if HF_TOKEN:
+            return InferenceClient(
+                model=TEXT_MODEL,
+                token=HF_TOKEN
+            )
+        else:
+            raise ValueError("Built-in provider is not available. Server HF_TOKEN is not configured.")
+    model = llm_config.get('model', '')
+    user_provider_api_key = llm_config.get('api_key', '')  # Provider-specific API key
+    user_hf_token = llm_config.get('hf_token', '')  # User's HF token
+    try:
+        # Case 1: Use a provider with a provider-specific API key if available
+        # This mode is currently hidden in the Flutter UI (we don't ask for provider-specific keys yet)
+        # but it is implemented here so that we don't forget it later
+        if user_provider_api_key:
+            return InferenceClient(
+                provider=provider,
+                model=model,
+                api_key=user_provider_api_key
+            )
+        # Case 2: Use a provider with user's HF token if available
+        elif user_hf_token:
+            return InferenceClient(
+                provider=provider,
+                model=model,
+                token=user_hf_token
+            )
+        else:
+            raise ValueError(f"No API key provided for provider '{provider}'. Please provide either a valid {provider} API key or your Hugging Face API key.")
+    except ValueError:
+        # Re-raise ValueError for missing API keys
+        raise
+    except Exception as e:
+        logger.error(f"Error creating InferenceClient for provider '{provider}' and model '{model}': {e}")
+        # Re-raise all other exceptions
+        raise
+async def generate_text(prompt: str, llm_config: Optional[dict] = None,
+                       max_new_tokens: int = 200, temperature: float = 0.7,
+                       model_override: Optional[str] = None) -> str:
+    """
+    Helper method to generate text using the appropriate client and configuration.
+    Tries chat_completion first (modern standard), falls back to text_generation.
+    Args:
+        prompt: The prompt to generate text from
+        llm_config: Optional LLM configuration dict
+        max_new_tokens: Maximum number of new tokens to generate
+        temperature: Temperature for generation
+        model_override: Optional model to use instead of the one in llm_config
+    Returns:
+        Generated text string
+    """
+    # Add game master prompt if provided
+    if llm_config and llm_config.get('game_master_prompt'):
+        game_master_prompt = llm_config['game_master_prompt'].strip()
+        if game_master_prompt:
+            prompt = f"{game_master_prompt}\n\n{prompt}"
+    # Get the appropriate client
+    client = get_inference_client(llm_config)
+    # Determine the model to use
+    if model_override:
+        model_to_use = model_override
+    elif llm_config:
+        model_to_use = llm_config.get('model', TEXT_MODEL)
+    else:
+        model_to_use = TEXT_MODEL
+    # Try chat_completion first (modern standard, more widely supported)
+    try:
+        messages = [{"role": "user", "content": prompt}]
+        if llm_config and llm_config.get('provider') != 'huggingface':
+            # For third-party providers
+            completion = await asyncio.get_event_loop().run_in_executor(
+                None,
+                lambda: client.chat.completions.create(
+                    messages=messages,
+                    max_tokens=max_new_tokens,
+                    temperature=temperature
+                )
+            )
+        else:
+            # For HuggingFace models, specify the model
+            completion = await asyncio.get_event_loop().run_in_executor(
+                None,
+                lambda: client.chat.completions.create(
+                    model=model_to_use,
+                    messages=messages,
+                    max_tokens=max_new_tokens,
+                    temperature=temperature
+                )
+            )
+        # Extract the generated text from the chat completion response
+        return completion.choices[0].message.content
+    except Exception as e:
+        error_message = str(e).lower()
+        # Check if the error is related to task compatibility or API not supported
+        if ("not supported for task" in error_message or
+            "conversational" in error_message or
+            "chat" in error_message):
+            logger.info(f"chat_completion not supported, falling back to text_generation: {e}")
+            # Fall back to text_generation API
+            try:
+                if llm_config and llm_config.get('provider') != 'huggingface':
+                    # For third-party providers
+                    response = await asyncio.get_event_loop().run_in_executor(
+                        None,
+                        lambda: client.text_generation(
+                            prompt,
+                            max_new_tokens=max_new_tokens,
+                            temperature=temperature
+                        )
+                    )
+                else:
+                    # For HuggingFace models, specify the model
+                    response = await asyncio.get_event_loop().run_in_executor(
+                        None,
+                        lambda: client.text_generation(
+                            prompt,
+                            model=model_to_use,
+                            max_new_tokens=max_new_tokens,
+                            temperature=temperature
+                        )
+                    )
+                return response
+            except Exception as text_error:
+                logger.error(f"Both chat_completion and text_generation failed: {text_error}")
+                raise text_error
+        else:
+            # Re-raise the original error if it's not a task compatibility issue
+            logger.error(f"chat_completion failed with non-compatibility error: {e}")
+            raise e

server/logging_utils.py ADDED Viewed

	@@ -0,0 +1,132 @@

+"""
+Colored logging utilities for the TikSlop server.
+"""
+import logging
+import re
+# ANSI color codes
+class Colors:
+    RESET = '\033[0m'
+    BOLD = '\033[1m'
+    DIM = '\033[2m'
+    # Foreground colors
+    BLACK = '\033[30m'
+    RED = '\033[31m'
+    GREEN = '\033[32m'
+    YELLOW = '\033[33m'
+    BLUE = '\033[34m'
+    MAGENTA = '\033[35m'
+    CYAN = '\033[36m'
+    WHITE = '\033[37m'
+    # Bright colors
+    BRIGHT_BLACK = '\033[90m'
+    BRIGHT_RED = '\033[91m'
+    BRIGHT_GREEN = '\033[92m'
+    BRIGHT_YELLOW = '\033[93m'
+    BRIGHT_BLUE = '\033[94m'
+    BRIGHT_MAGENTA = '\033[95m'
+    BRIGHT_CYAN = '\033[96m'
+    BRIGHT_WHITE = '\033[97m'
+    # Background colors
+    BG_BLACK = '\033[40m'
+    BG_RED = '\033[41m'
+    BG_GREEN = '\033[42m'
+    BG_YELLOW = '\033[43m'
+    BG_BLUE = '\033[44m'
+    BG_MAGENTA = '\033[45m'
+    BG_CYAN = '\033[46m'
+    BG_WHITE = '\033[47m'
+class ColoredFormatter(logging.Formatter):
+    """Custom formatter with colors and patterns"""
+    def __init__(self):
+        super().__init__()
+    def format(self, record):
+        # Color mapping for log levels
+        level_colors = {
+            'DEBUG': Colors.BRIGHT_BLACK,
+            'INFO': Colors.BRIGHT_CYAN,
+            'WARNING': Colors.BRIGHT_YELLOW,
+            'ERROR': Colors.BRIGHT_RED,
+            'CRITICAL': Colors.BRIGHT_MAGENTA + Colors.BOLD
+        }
+        # Format timestamp
+        timestamp = f"{Colors.DIM}{self.formatTime(record, '%H:%M:%S.%f')[:-3]}{Colors.RESET}"
+        # Format level with color
+        level_color = level_colors.get(record.levelname, Colors.WHITE)
+        level = f"{level_color}{record.levelname:>7}{Colors.RESET}"
+        # Format logger name
+        logger_name = f"{Colors.BRIGHT_BLACK}[{record.name}]{Colors.RESET}"
+        # Format message with keyword highlighting
+        message = self.colorize_message(record.getMessage())
+        return f"{timestamp} {level} {logger_name} {message}"
+    def colorize_message(self, message):
+        """Add colors to specific keywords and patterns in the message"""
+        # Highlight request IDs in brackets (gray like logger names)
+        message = re.sub(r'\[([a-f0-9-]{36})\]', f'{Colors.BRIGHT_BLACK}[\\1]{Colors.RESET}', message)
+        # Highlight user IDs
+        message = re.sub(r'user ([a-zA-Z0-9-]+)', f'user {Colors.BRIGHT_BLUE}\\1{Colors.RESET}', message)
+        # Highlight actions
+        message = re.sub(r'(generate_video|search|simulate|join_chat|leave_chat|chat_message)',
+                        f'{Colors.BRIGHT_YELLOW}\\1{Colors.RESET}', message)
+        # Highlight status keywords
+        message = re.sub(r'\b(success|successful|completed|connected|ready)\b',
+                        f'{Colors.BRIGHT_GREEN}\\1{Colors.RESET}', message, flags=re.IGNORECASE)
+        message = re.sub(r'\b(error|failed|timeout|exception)\b',
+                        f'{Colors.BRIGHT_RED}\\1{Colors.RESET}', message, flags=re.IGNORECASE)
+        message = re.sub(r'\b(warning|retry|reconnect)\b',
+                        f'{Colors.BRIGHT_YELLOW}\\1{Colors.RESET}', message, flags=re.IGNORECASE)
+        # Highlight numbers (timing, counts, etc.) but not those inside UUIDs
+        message = re.sub(r'(?<![a-f0-9-])\b(\d+\.?\d*)(s|ms|chars|bytes)?\b(?![a-f0-9-])',
+                        f'{Colors.BRIGHT_MAGENTA}\\1{Colors.CYAN}\\2{Colors.RESET}', message)
+        # Highlight roles
+        message = re.sub(r'\b(role|user_role)=([a-zA-Z]+)',
+                        f'\\1={Colors.BRIGHT_CYAN}\\2{Colors.RESET}', message)
+        # Highlight titles in quotes
+        message = re.sub(r"title='([^']*)'", f"title='{Colors.GREEN}\\1{Colors.RESET}'", message)
+        return message
+def setup_colored_logging():
+    """Set up colored logging for the entire application"""
+    # Configure logging with colors
+    logging.basicConfig(
+        level=logging.INFO,
+        handlers=[
+            logging.StreamHandler()
+        ]
+    )
+    # Set up colored formatter
+    handler = logging.StreamHandler()
+    handler.setFormatter(ColoredFormatter())
+    # Apply to root logger and clear default handlers
+    root_logger = logging.getLogger()
+    root_logger.handlers.clear()
+    root_logger.addHandler(handler)
+def get_logger(name):
+    """Get a logger with the given name"""
+    return logging.getLogger(name)

server/models.py ADDED Viewed

	@@ -0,0 +1,38 @@

+"""
+Data models and dataclasses used throughout the application.
+"""
+from dataclasses import dataclass
+from typing import Literal, Set, List, Dict, Any
+# User role type
+UserRole = Literal['anon', 'normal', 'pro', 'admin']
+@dataclass
+class Endpoint:
+    """Represents a video generation endpoint."""
+    id: int
+    url: str
+    busy: bool = False
+    last_used: float = 0
+    error_count: int = 0
+    error_until: float = 0  # Timestamp until which this endpoint is considered in error state
+class ChatRoom:
+    """Represents a chat room for a video."""
+    def __init__(self):
+        self.messages: List[Dict[str, Any]] = []
+        self.connected_clients: Set[Any] = set()
+        self.max_history: int = 100
+    def add_message(self, message: Dict[str, Any]) -> None:
+        """Add a message to the chat room history."""
+        self.messages.append(message)
+        if len(self.messages) > self.max_history:
+            self.messages.pop(0)
+    def get_recent_messages(self, limit: int = 50) -> List[Dict[str, Any]]:
+        """Get the most recent messages from the chat room."""
+        return self.messages[-limit:]

server/utils.py ADDED Viewed

	@@ -0,0 +1,131 @@

+"""
+Generic utility functions used across the application.
+"""
+import random
+import re
+def generate_seed():
+    """Generate a random positive 32-bit integer seed."""
+    return random.randint(0, 2**32 - 1)
+def sanitize_yaml_response(response_text: str) -> str:
+    """
+    Sanitize and format AI response into valid YAML.
+    Returns properly formatted YAML string.
+    """
+    # Pre-processing: Remove code block markers
+    if response_text.startswith("```yaml"):
+        # Remove the "```yaml" at the beginning and closing ```
+        response_text = response_text[7:]  # Remove "```yaml" (7 characters)
+        if response_text.endswith("```"):
+            response_text = response_text[:-3]  # Remove closing ```
+        response_text = response_text.strip()
+    elif response_text.startswith("```"):
+        # Remove the "```" at the beginning and closing ```
+        response_text = response_text[3:]  # Remove opening ```
+        if response_text.endswith("```"):
+            response_text = response_text[:-3]  # Remove closing ```
+        response_text = response_text.strip()
+    # Handle edge case where the LLM might have continued the prompt
+    # e.g., if the response starts with the incomplete prompt we provided
+    if response_text.startswith('title: \\"'):
+        # Remove the incomplete prompt prefix
+        response_text = response_text[9:].strip()
+    # Check if it already has a proper YAML structure
+    if not response_text.startswith(('title:', 'title :')):
+        # Only wrap with title if it doesn't already have one
+        # The sanitize function will handle escaping
+        response_text = f'title: {response_text}'
+    # Split on first occurrence of ``` to handle any remaining code blocks
+    response_text = response_text.split("```")[0]
+    # Remove any markdown code block indicators and YAML document markers
+    clean_text = re.sub(r'```yaml|```|---|\.\.\.$', '', response_text.strip())
+    # Handle the specific case where LLM duplicates 'title:' in the value
+    # e.g., title: "title: "Something"" -> title: "Something"
+    clean_text = re.sub(r'title:\s*"title:\s*"([^"]+)""?', r'title: "\1"', clean_text)
+    clean_text = re.sub(r'title:\s*\'title:\s*\'([^\']+)\'\'?', r'title: \'\1\'', clean_text)
+    clean_text = re.sub(r'title:\s*"title:\s*\'([^\']+)\'"?', r'title: "\1"', clean_text)
+    clean_text = re.sub(r'title:\s*\'title:\s*"([^"]+)"\'?', r'title: \'\1\'', clean_text)
+    # Also handle case where title appears twice without quotes
+    clean_text = re.sub(r'title:\s*title:\s*(.+)$', r'title: \1', clean_text, flags=re.MULTILINE)
+    # Split into lines and process each line
+    lines = clean_text.split('\n')
+    sanitized_lines = []
+    current_field = None
+    for line in lines:
+        stripped = line.strip()
+        if not stripped:
+            continue
+        # Handle field starts
+        if stripped.startswith('title:') or stripped.startswith('description:'):
+            # Ensure proper YAML format with space after colon and proper quoting
+            field_name = stripped.split(':', 1)[0]
+            field_value = stripped.split(':', 1)[1].strip()
+            # Remove outer quotes first
+            if (field_value.startswith('"') and field_value.endswith('"')) or \
+               (field_value.startswith("'") and field_value.endswith("'")):
+                field_value = field_value[1:-1]
+            # Check for nested title pattern again (in case it wasn't caught by regex)
+            if field_name == 'title' and field_value.lower().startswith('title:'):
+                # Remove the nested 'title:' prefix
+                field_value = field_value[6:].strip().strip('"\'')
+            # Escape any internal quotes
+            field_value = field_value.replace('"', '\\"')
+            # Always quote the value to ensure proper YAML formatting
+            field_value = f'"{field_value}"'
+            sanitized_lines.append(f"{field_name}: {field_value}")
+            current_field = field_name
+        elif stripped.startswith('tags:'):
+            sanitized_lines.append('tags:')
+            current_field = 'tags'
+        elif stripped.startswith('-') and current_field == 'tags':
+            # Process tag values
+            tag = stripped[1:].strip().strip('"\'')
+            if tag:
+                # Clean and format tag
+                tag = re.sub(r'[^\x00-\x7F]+', '', tag)  # Remove non-ASCII
+                tag = re.sub(r'[^a-zA-Z0-9\s-]', '', tag)  # Keep only alphanumeric and hyphen
+                tag = tag.strip().lower().replace(' ', '-')
+                if tag:
+                    sanitized_lines.append(f"  - {tag}")
+        elif current_field in ['title', 'description']:
+            # Handle multi-line title/description continuation
+            value = stripped.strip('"\'')
+            if value:
+                # Append to previous line (but within the quotes)
+                prev = sanitized_lines[-1]
+                # Remove the closing quote, append the value, and add the quote back
+                if prev.endswith('"'):
+                    sanitized_lines[-1] = f'{prev[:-1]} {value}"'
+    # Ensure the YAML has all required fields
+    required_fields = {'title', 'description', 'tags'}
+    found_fields = {line.split(':')[0].strip() for line in sanitized_lines if ':' in line}
+    for field in required_fields - found_fields:
+        if field == 'tags':
+            sanitized_lines.extend(['tags:', '  - default'])
+        else:
+            sanitized_lines.append(f'{field}: "No {field} provided"')
+    return '\n'.join(sanitized_lines)

server/video_utils.py ADDED Viewed

	@@ -0,0 +1,174 @@

+"""
+Video generation utilities for HuggingFace endpoints and Gradio spaces.
+"""
+import asyncio
+import time
+import uuid
+import logging
+from typing import Dict
+from aiohttp import ClientSession
+from gradio_client import Client
+from .models import UserRole, Endpoint
+from .api_config import HF_TOKEN, GUIDANCE_SCALE
+from .logging_utils import get_logger
+logger = get_logger(__name__)
+async def generate_video_content_with_inference_endpoints(
+    endpoint_manager, prompt: str, negative_prompt: str, width: int,
+    height: int, num_frames: int, num_inference_steps: int,
+    frame_rate: int, seed: int, options: dict, user_role: UserRole
+) -> str:
+    """
+    Internal method to generate video content with specific parameters.
+    Used by both regular video generation and thumbnail generation.
+    """
+    is_thumbnail = options.get('thumbnail', False)
+    request_id = options.get('request_id', str(uuid.uuid4())[:8])  # Get or generate request ID
+    video_id = options.get('video_id', 'unknown')
+    # logger.info(f"[{request_id}] Generating {'thumbnail' if is_thumbnail else 'video'} for video {video_id} with seed {seed}")
+    json_payload = {
+        "inputs": {
+            "prompt": prompt,
+        },
+        "parameters": {
+            # ------------------- settings for LTX-Video -----------------------
+            "negative_prompt": negative_prompt,
+            "width": width,
+            "height": height,
+            "num_frames": num_frames,
+            "num_inference_steps": num_inference_steps,
+            "guidance_scale": options.get('guidance_scale', GUIDANCE_SCALE),
+            "seed": seed,
+            # ------------------- settings for Varnish -----------------------
+            "double_num_frames": False,  # <- False for real-time generation
+            "fps": frame_rate,
+            "super_resolution": False,  # <- False for real-time generation
+            "grain_amount": 0,  # No film grain (on low-res, low-quality generation the effects aren't worth it + it adds weight to the MP4 payload)
+        }
+    }
+    # Add thumbnail flag to help with metrics and debugging
+    if is_thumbnail:
+        json_payload["metadata"] = {
+            "is_thumbnail": True,
+            "thumbnail_version": "1.0",
+            "request_id": request_id
+        }
+    # logger.info(f"[{request_id}] Waiting for an available endpoint...")
+    async with endpoint_manager.get_endpoint() as endpoint:
+        # logger.info(f"[{request_id}] Using endpoint {endpoint.id} for generation")
+        try:
+            async with ClientSession() as session:
+                #logger.info(f"[{request_id}] Sending request to endpoint {endpoint.id}: {endpoint.url}")
+                start_time = time.time()
+                # Proceed with actual request
+                async with session.post(
+                    endpoint.url,
+                    headers={
+                        "Accept": "application/json",
+                        "Authorization": f"Bearer {HF_TOKEN}",
+                        "Content-Type": "application/json",
+                        "X-Request-ID": request_id  # Add request ID to headers
+                    },
+                    json=json_payload,
+                    timeout=12  # Extended timeout for thumbnails (was 8s)
+                ) as response:
+                    request_duration = time.time() - start_time
+                    #logger.info(f"[{request_id}] Received response from endpoint {endpoint.id} in {request_duration:.2f}s: HTTP {response.status}")
+                    if response.status != 200:
+                        error_text = await response.text()
+                        logger.error(f"[{request_id}] Failed response: {error_text}")
+                        # Mark endpoint as in error state
+                        await endpoint_manager.mark_endpoint_error(endpoint)
+                        if "paused" in error_text:
+                            logger.error(f"[{request_id}] Endpoint is paused")
+                            return ""
+                        raise Exception(f"Video generation failed: HTTP {response.status} - {error_text}")
+                    result = await response.json()
+                    #logger.info(f"[{request_id}] Successfully parsed JSON response")
+                    if "error" in result:
+                        error_msg = result['error']
+                        logger.error(f"[{request_id}] Error in response: {error_msg}")
+                        # Mark endpoint as in error state
+                        await endpoint_manager.mark_endpoint_error(endpoint)
+                        if "paused" in str(error_msg).lower():
+                            logger.error(f"[{request_id}] Endpoint is paused")
+                            return ""
+                        raise Exception(f"Video generation failed: {error_msg}")
+                    video_data_uri = result.get("video")
+                    if not video_data_uri:
+                        logger.error(f"[{request_id}] No video data in response")
+                        # Mark endpoint as in error state
+                        await endpoint_manager.mark_endpoint_error(endpoint)
+                        raise Exception("No video data in response")
+                    # Get data size
+                    data_size = len(video_data_uri)
+                    #logger.info(f"[{request_id}] Received video data: {data_size} chars")
+                    # Reset error count on successful call
+                    endpoint.error_count = 0
+                    endpoint.error_until = 0
+                    return video_data_uri
+        except asyncio.TimeoutError:
+            # Handle timeout specifically
+            logger.error(f"[{request_id}] Timeout occurred after {time.time() - start_time:.2f}s")
+            await endpoint_manager.mark_endpoint_error(endpoint, is_timeout=True)
+            return ""
+        except Exception as e:
+            # Handle all other exceptions
+            logger.error(f"[{request_id}] Exception during video generation: {str(e)}")
+            if not isinstance(e, asyncio.TimeoutError):  # Already handled above
+                await endpoint_manager.mark_endpoint_error(endpoint)
+            return ""
+async def generate_video_content_with_gradio(
+    endpoint_manager, prompt: str, negative_prompt: str, width: int,
+    height: int, num_frames: int, num_inference_steps: int,
+    frame_rate: int, seed: int, options: dict, user_role: UserRole
+) -> str:
+    """
+    Internal method to generate video content with specific parameters.
+    Used by both regular video generation and thumbnail generation.
+    This version uses our generic gradio space.
+    """
+    is_thumbnail = options.get('thumbnail', False)
+    request_id = options.get('request_id', str(uuid.uuid4())[:8])  # Get or generate request ID
+    video_id = options.get('video_id', 'unknown')
+    # logger.info(f"[{request_id}] Generating {'thumbnail' if is_thumbnail else 'video'} for video {video_id} with seed {seed}")
+    # Define the synchronous function
+    def _sync_gradio_call():
+        client = Client("jbilcke-hf/fast-rendering-node", hf_token=HF_TOKEN)
+        return client.predict(
+            prompt=prompt,
+            seed=seed,
+            fps=8, # frame_rate, # attention, right now tilslop asks for 25 FPS
+            width=640, # width, # attention, right now tikslop asks for 1152
+            height=320, # height, # attention, righ tnow tikslop asks for 640
+            duration=3, # num_frames // frame_rate
+        )
+    # Run in a thread using asyncio.to_thread (Python 3.9+)
+    video_data_uri = await asyncio.to_thread(_sync_gradio_call)
+    return video_data_uri