jbilcke-hf HF Staff commited on
Commit
7dadc22
Β·
1 Parent(s): 5707a78
.claude/settings.local.json CHANGED
@@ -1,7 +1,10 @@
1
  {
2
  "permissions": {
3
  "allow": [
4
- "Bash(flutter build:*)"
 
 
 
5
  ],
6
  "deny": []
7
  },
 
1
  {
2
  "permissions": {
3
  "allow": [
4
+ "Bash(flutter build:*)",
5
+ "Bash(mv:*)",
6
+ "Bash(ls:*)",
7
+ "Bash(python:*)"
8
  ],
9
  "deny": []
10
  },
api.py CHANGED
@@ -8,17 +8,16 @@ import uuid
8
  from aiohttp import web, WSMsgType
9
  from typing import Dict, Any
10
 
11
- from api_core import VideoGenerationAPI
12
- from api_session import SessionManager
13
- from api_metrics import MetricsTracker
14
- from api_config import *
15
 
16
- # Configure logging
17
- logging.basicConfig(
18
- level=logging.INFO,
19
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
20
- )
21
- logger = logging.getLogger(__name__)
22
 
23
  # Create global session and metrics managers
24
  session_manager = SessionManager()
@@ -175,6 +174,8 @@ async def websocket_handler(request: web.Request) -> web.WebSocketResponse:
175
  if action in ['join_chat', 'leave_chat', 'chat_message']:
176
  await user_session.chat_queue.put(data)
177
  elif action in ['generate_video']:
 
 
178
  await user_session.video_queue.put(data)
179
  elif action == 'search':
180
  await user_session.search_queue.put(data)
 
8
  from aiohttp import web, WSMsgType
9
  from typing import Dict, Any
10
 
11
+ from server.api_core import VideoGenerationAPI
12
+ from server.api_session import SessionManager
13
+ from server.api_metrics import MetricsTracker
14
+ from server.api_config import *
15
 
16
+ # Set up colored logging
17
+ from server.logging_utils import setup_colored_logging, get_logger
18
+
19
+ setup_colored_logging()
20
+ logger = get_logger(__name__)
 
21
 
22
  # Create global session and metrics managers
23
  session_manager = SessionManager()
 
174
  if action in ['join_chat', 'leave_chat', 'chat_message']:
175
  await user_session.chat_queue.put(data)
176
  elif action in ['generate_video']:
177
+ request_id = data.get('requestId', 'unknown')
178
+ #logger.info(f"[{request_id}] Received generate_video request from user {user_id}, adding to video queue")
179
  await user_session.video_queue.put(data)
180
  elif action == 'search':
181
  await user_session.search_queue.put(data)
assets/config/default.yaml CHANGED
@@ -21,9 +21,10 @@ advertising:
21
  link: https://huggingface.co/docs/smolagents/index
22
 
23
  simulation:
24
- # how often the description should evolve (in seconds)
25
- # setting to 0 disables description evolution
26
- sim_loop_frequency_in_sec: 8
 
27
 
28
  video:
29
  # default negative prompt to filter harmful content
 
21
  link: https://huggingface.co/docs/smolagents/index
22
 
23
  simulation:
24
+ # delay to wait after each simulation loop (in seconds)
25
+ sim_loop_delay_in_sec: 8
26
+
27
+ enable_sim_loop: false
28
 
29
  video:
30
  # default negative prompt to filter harmful content
assets/config/tikslop.yaml CHANGED
@@ -25,11 +25,10 @@ advertising:
25
 
26
  simulation:
27
  # whether to enable simulation loop to evolve descriptions over time
28
- enable_sim_loop: false
29
 
30
- # how often the description should evolve (in seconds)
31
- # setting to 0 disables description evolution
32
- sim_loop_frequency_in_sec: 8
33
 
34
  # it's OK to use high values here,
35
  # because some of those values are limited by the backend config,
 
25
 
26
  simulation:
27
  # whether to enable simulation loop to evolve descriptions over time
28
+ enable_sim_loop: true
29
 
30
+ # delay to wait after each simulation loop (in seconds)
31
+ sim_loop_delay_in_sec: 8
 
32
 
33
  # it's OK to use high values here,
34
  # because some of those values are limited by the backend config,
build/web/flutter_bootstrap.js CHANGED
@@ -38,6 +38,6 @@ _flutter.buildConfig = {"engineRevision":"1c9c20e7c3dd48c66f400a24d48ea806b4ab31
38
 
39
  _flutter.loader.load({
40
  serviceWorkerSettings: {
41
- serviceWorkerVersion: "401217633"
42
  }
43
  });
 
38
 
39
  _flutter.loader.load({
40
  serviceWorkerSettings: {
41
+ serviceWorkerVersion: "2752272678"
42
  }
43
  });
build/web/flutter_service_worker.js CHANGED
@@ -3,12 +3,12 @@ const MANIFEST = 'flutter-app-manifest';
3
  const TEMP = 'flutter-temp-cache';
4
  const CACHE_NAME = 'flutter-app-cache';
5
 
6
- const RESOURCES = {"flutter_bootstrap.js": "4704f10d11e3b5570203ced46f420e01",
7
  "version.json": "68350cac7987de2728345c72918dd067",
8
  "tikslop.png": "570e1db759046e2d224fef729983634e",
9
  "index.html": "3a7029b3672560e7938aab6fa4d30a46",
10
  "/": "3a7029b3672560e7938aab6fa4d30a46",
11
- "main.dart.js": "3e9b078cf004be6fc60825295d810cf0",
12
  "tikslop.svg": "26140ba0d153b213b122bc6ebcc17f6c",
13
  "flutter.js": "888483df48293866f9f41d3d9274a779",
14
  "favicon.png": "c8a183c516004e648a7bac7497c89b97",
 
3
  const TEMP = 'flutter-temp-cache';
4
  const CACHE_NAME = 'flutter-app-cache';
5
 
6
+ const RESOURCES = {"flutter_bootstrap.js": "b92f2a2de5eedfe76230ecb6dfa2b1fe",
7
  "version.json": "68350cac7987de2728345c72918dd067",
8
  "tikslop.png": "570e1db759046e2d224fef729983634e",
9
  "index.html": "3a7029b3672560e7938aab6fa4d30a46",
10
  "/": "3a7029b3672560e7938aab6fa4d30a46",
11
+ "main.dart.js": "6491f49e4f56ea8d316d8372e4e08020",
12
  "tikslop.svg": "26140ba0d153b213b122bc6ebcc17f6c",
13
  "flutter.js": "888483df48293866f9f41d3d9274a779",
14
  "favicon.png": "c8a183c516004e648a7bac7497c89b97",
build/web/index.html CHANGED
@@ -156,7 +156,7 @@
156
  </script>
157
 
158
  <!-- Add version parameter for cache busting -->
159
- <script src="flutter_bootstrap.js?v=1753366329" async></script>
160
 
161
  <!-- Add cache busting script -->
162
  <script>
 
156
  </script>
157
 
158
  <!-- Add version parameter for cache busting -->
159
+ <script src="flutter_bootstrap.js?v=1753441877" async></script>
160
 
161
  <!-- Add cache busting script -->
162
  <script>
build/web/main.dart.js CHANGED
The diff for this file is too large to render. See raw diff
 
lib/config/config.dart CHANGED
@@ -132,8 +132,8 @@ class Configuration {
132
  bool get enableSimLoop =>
133
  _config['simulation']?['enable_sim_loop'] ?? true;
134
 
135
- int get simLoopFrequencyInSec =>
136
- _config['simulation']?['sim_loop_frequency_in_sec'] ?? 0;
137
 
138
  // Computed properties
139
 
 
132
  bool get enableSimLoop =>
133
  _config['simulation']?['enable_sim_loop'] ?? true;
134
 
135
+ int get simLoopDelayInSec =>
136
+ _config['simulation']?['sim_loop_delay_in_sec'] ?? 0;
137
 
138
  // Computed properties
139
 
lib/screens/settings_screen.dart CHANGED
@@ -17,12 +17,14 @@ class _SettingsScreenState extends State<SettingsScreen> {
17
  final _promptController = TextEditingController();
18
  final _negativePromptController = TextEditingController();
19
  final _hfApiKeyController = TextEditingController();
 
20
  final _llmApiKeyController = TextEditingController();
21
  final _modelNameController = TextEditingController();
22
  final _settingsService = SettingsService();
23
  final _availabilityService = ModelAvailabilityService();
24
  bool _showSceneDebugInfo = false;
25
  bool _enableSimulation = true;
 
26
  String _selectedLlmProvider = 'built-in';
27
  String _selectedLlmModel = 'meta-llama/Llama-3.2-3B-Instruct';
28
  LLMProvider? _currentProvider;
@@ -39,9 +41,11 @@ class _SettingsScreenState extends State<SettingsScreen> {
39
  _promptController.text = _settingsService.videoPromptPrefix;
40
  _negativePromptController.text = _settingsService.negativeVideoPrompt;
41
  _hfApiKeyController.text = _settingsService.huggingfaceApiKey;
 
42
  _llmApiKeyController.text = _settingsService.llmApiKey;
43
  _showSceneDebugInfo = _settingsService.showSceneDebugInfo;
44
  _enableSimulation = _settingsService.enableSimulation;
 
45
 
46
  // Auto-select built-in model if no HF API key
47
  if (_settingsService.huggingfaceApiKey.isEmpty) {
@@ -73,6 +77,7 @@ class _SettingsScreenState extends State<SettingsScreen> {
73
  _promptController.dispose();
74
  _negativePromptController.dispose();
75
  _hfApiKeyController.dispose();
 
76
  _llmApiKeyController.dispose();
77
  _modelNameController.dispose();
78
  super.dispose();
@@ -285,6 +290,20 @@ class _SettingsScreenState extends State<SettingsScreen> {
285
  },
286
  ),
287
  const SizedBox(height: 16),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  // Model selection dropdown
289
  DropdownButtonFormField<String>(
290
  decoration: InputDecoration(
@@ -713,8 +732,6 @@ class _SettingsScreenState extends State<SettingsScreen> {
713
  _settingsService.setShowSceneDebugInfo(value);
714
  },
715
  ),
716
- /*
717
- let's disable this for now, I still need to work on this
718
  SwitchListTile(
719
  title: const Text('Enable world simulator engine'),
720
  subtitle: const Text('Allow video descriptions to evolve over time using a LLM (this consumes tokens, your Hugging Face account will be billed)'),
@@ -726,7 +743,48 @@ class _SettingsScreenState extends State<SettingsScreen> {
726
  _settingsService.setEnableSimulation(value);
727
  },
728
  ),
729
- */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
730
  const SizedBox(height: 16),
731
  // Clear device connections button
732
  ListTile(
 
17
  final _promptController = TextEditingController();
18
  final _negativePromptController = TextEditingController();
19
  final _hfApiKeyController = TextEditingController();
20
+ final _gameMasterPromptController = TextEditingController();
21
  final _llmApiKeyController = TextEditingController();
22
  final _modelNameController = TextEditingController();
23
  final _settingsService = SettingsService();
24
  final _availabilityService = ModelAvailabilityService();
25
  bool _showSceneDebugInfo = false;
26
  bool _enableSimulation = true;
27
+ int _simLoopDelayInSec = 5;
28
  String _selectedLlmProvider = 'built-in';
29
  String _selectedLlmModel = 'meta-llama/Llama-3.2-3B-Instruct';
30
  LLMProvider? _currentProvider;
 
41
  _promptController.text = _settingsService.videoPromptPrefix;
42
  _negativePromptController.text = _settingsService.negativeVideoPrompt;
43
  _hfApiKeyController.text = _settingsService.huggingfaceApiKey;
44
+ _gameMasterPromptController.text = _settingsService.gameMasterPrompt;
45
  _llmApiKeyController.text = _settingsService.llmApiKey;
46
  _showSceneDebugInfo = _settingsService.showSceneDebugInfo;
47
  _enableSimulation = _settingsService.enableSimulation;
48
+ _simLoopDelayInSec = _settingsService.simLoopDelayInSec;
49
 
50
  // Auto-select built-in model if no HF API key
51
  if (_settingsService.huggingfaceApiKey.isEmpty) {
 
77
  _promptController.dispose();
78
  _negativePromptController.dispose();
79
  _hfApiKeyController.dispose();
80
+ _gameMasterPromptController.dispose();
81
  _llmApiKeyController.dispose();
82
  _modelNameController.dispose();
83
  super.dispose();
 
290
  },
291
  ),
292
  const SizedBox(height: 16),
293
+ TextField(
294
+ controller: _gameMasterPromptController,
295
+ decoration: const InputDecoration(
296
+ labelText: 'Game Master Prompt',
297
+ hintText: 'Keep things fun and kid-friendly.',
298
+ helperText: 'Additional instructions applied to all LLM requests (search, captions, simulations)',
299
+ helperMaxLines: 2,
300
+ ),
301
+ maxLines: 3,
302
+ onChanged: (value) {
303
+ _settingsService.setGameMasterPrompt(value);
304
+ },
305
+ ),
306
+ const SizedBox(height: 16),
307
  // Model selection dropdown
308
  DropdownButtonFormField<String>(
309
  decoration: InputDecoration(
 
732
  _settingsService.setShowSceneDebugInfo(value);
733
  },
734
  ),
 
 
735
  SwitchListTile(
736
  title: const Text('Enable world simulator engine'),
737
  subtitle: const Text('Allow video descriptions to evolve over time using a LLM (this consumes tokens, your Hugging Face account will be billed)'),
 
743
  _settingsService.setEnableSimulation(value);
744
  },
745
  ),
746
+ // Only show simulation delay setting if user has HF API key
747
+ if (_hfApiKeyController.text.isNotEmpty) ...[
748
+ const SizedBox(height: 8),
749
+ ListTile(
750
+ title: const Text('Simulation Loop Delay'),
751
+ subtitle: Text('Delay between simulation iterations: ${_simLoopDelayInSec}s (Warning: Short delays consume more LLM tokens)'),
752
+ trailing: SizedBox(
753
+ width: 200,
754
+ child: Row(
755
+ mainAxisAlignment: MainAxisAlignment.end,
756
+ children: [
757
+ IconButton(
758
+ onPressed: _simLoopDelayInSec > 0 ? () {
759
+ setState(() {
760
+ _simLoopDelayInSec = (_simLoopDelayInSec - 1).clamp(0, 300);
761
+ });
762
+ _settingsService.setSimLoopDelayInSec(_simLoopDelayInSec);
763
+ } : null,
764
+ icon: const Icon(Icons.remove),
765
+ ),
766
+ SizedBox(
767
+ width: 50,
768
+ child: Text(
769
+ '${_simLoopDelayInSec}s',
770
+ textAlign: TextAlign.center,
771
+ style: const TextStyle(fontWeight: FontWeight.bold),
772
+ ),
773
+ ),
774
+ IconButton(
775
+ onPressed: _simLoopDelayInSec < 300 ? () {
776
+ setState(() {
777
+ _simLoopDelayInSec = (_simLoopDelayInSec + 1).clamp(0, 300);
778
+ });
779
+ _settingsService.setSimLoopDelayInSec(_simLoopDelayInSec);
780
+ } : null,
781
+ icon: const Icon(Icons.add),
782
+ ),
783
+ ],
784
+ ),
785
+ ),
786
+ ),
787
+ ],
788
  const SizedBox(height: 16),
789
  // Clear device connections button
790
  ListTile(
lib/services/chat_service.dart CHANGED
@@ -146,8 +146,11 @@ class ChatService {
146
  }
147
 
148
  void _handleChatMessage(ChatMessage message) {
 
 
149
  // Only add messages if they're for the current room
150
  if (message.videoId == _currentRoomId) {
 
151
  _chatController.add(message);
152
 
153
  // Store this message in the recent messages for this room
 
146
  }
147
 
148
  void _handleChatMessage(ChatMessage message) {
149
+ debugPrint('CHAT_DEBUG: ChatService received message - videoId: ${message.videoId}, currentRoom: $_currentRoomId, content: "${message.content}"');
150
+
151
  // Only add messages if they're for the current room
152
  if (message.videoId == _currentRoomId) {
153
+ debugPrint('CHAT_DEBUG: Message matches current room, forwarding to controller');
154
  _chatController.add(message);
155
 
156
  // Store this message in the recent messages for this room
lib/services/clip_queue/clip_queue_manager.dart CHANGED
@@ -40,11 +40,7 @@ class ClipQueueManager {
40
  /// Timer for checking the buffer state
41
  Timer? _bufferCheckTimer;
42
 
43
- /// Timer for evolving the description
44
- Timer? _descriptionEvolutionTimer;
45
 
46
- /// Last time the description was evolved
47
- DateTime _lastDescriptionEvolutionTime = DateTime.now();
48
 
49
  /// Whether the manager is disposed
50
  bool _isDisposed = false;
@@ -88,20 +84,25 @@ class ClipQueueManager {
88
  );
89
 
90
  // Start listening to chat messages
 
91
  final chatService = ChatService();
92
  chatService.initialize().then((_) {
 
93
  chatService.joinRoom(videoId).then((_) {
 
94
  chatService.chatStream.listen(_addChatMessage);
95
  }).catchError((e) {
96
- debugPrint('ClipQueueManager: Error joining chat room: $e');
97
  });
98
  }).catchError((e) {
99
- debugPrint('ClipQueueManager: Error initializing chat service: $e');
100
  });
101
  }
102
 
103
  /// Add a chat message to the recent messages list
104
  void _addChatMessage(ChatMessage message) {
 
 
105
  if (message.videoId == videoId) {
106
  _recentChatMessages.add(message);
107
  // Keep only the 5 most recent messages
@@ -109,6 +110,9 @@ class ClipQueueManager {
109
  _recentChatMessages.removeAt(0);
110
  }
111
  ClipQueueConstants.logEvent('Added chat message: ${message.content.substring(0, min(20, message.content.length))}...');
 
 
 
112
  }
113
  }
114
 
@@ -156,9 +160,8 @@ class ClipQueueManager {
156
  );
157
  _clipBuffer.clear();
158
 
159
- // Reset evolution counter and last evolution time
160
  _evolutionCounter = 0;
161
- _lastDescriptionEvolutionTime = DateTime.now();
162
 
163
  // Set initial orientation
164
  _currentOrientation = orientation ?? getOrientationFromDimensions(
@@ -217,8 +220,8 @@ class ClipQueueManager {
217
 
218
  /// Start the simulation timer
219
  void _startDescriptionEvolution() {
220
- // Cancel any existing timer
221
- _descriptionEvolutionTimer?.cancel();
222
 
223
  // Check if simulation is enabled globally in config and from user settings
224
  final settingsService = SettingsService();
@@ -231,61 +234,13 @@ class ClipQueueManager {
231
  return;
232
  }
233
 
234
- if (Configuration.instance.simLoopFrequencyInSec <= 0) {
235
- debugPrint('SIMULATION: Disabled (frequency is 0)');
236
- ClipQueueConstants.logEvent('Simulation disabled (frequency is 0)');
237
- return;
238
- }
239
 
240
- debugPrint('SIMULATION: Starting simulation timer with settings: enableSimLoop=${Configuration.instance.enableSimLoop}, userSetting=${settingsService.enableSimulation}, frequency=${Configuration.instance.simLoopFrequencyInSec}s');
241
 
242
- // Adaptive check interval - less frequent checks to reduce overhead
243
- final checkInterval = max(3, Configuration.instance.simLoopFrequencyInSec ~/ 3);
244
 
245
- ClipQueueConstants.logEvent('Starting simulation with check interval of $checkInterval seconds');
246
-
247
- // Check periodically if it's time to simulate the video
248
- _descriptionEvolutionTimer = Timer.periodic(
249
- Duration(seconds: checkInterval),
250
- (timer) async {
251
- // debugPrint('SIMULATION: Timer check triggered');
252
- if (_isDisposed) {
253
- debugPrint('SIMULATION: Skipping because manager is disposed');
254
- return;
255
- }
256
-
257
- // Skip if simulation is paused (due to video playback being paused)
258
- if (_isSimulationPaused) {
259
- // debugPrint('SIMULATION: Skipping because it is paused');
260
- ClipQueueConstants.logEvent('Skipping simulation because it is paused');
261
- return;
262
- }
263
-
264
- // We previously delayed simulation if clips were being generated,
265
- // but since clip generation is constant, we'll now run them in parallel
266
- final isGenerating = _activeGenerations.isNotEmpty;
267
- if (isGenerating) {
268
- // debugPrint('SIMULATION: Continuing with simulation despite active generations');
269
- ClipQueueConstants.logEvent('Running simulation in parallel with active generations');
270
- // We no longer return early here
271
- }
272
-
273
- // Calculate time since last simulation
274
- final now = DateTime.now();
275
- final duration = now.difference(_lastDescriptionEvolutionTime);
276
- // debugPrint('SIMULATION: Time since last simulation: ${duration.inSeconds}s (frequency: ${Configuration.instance.simLoopFrequencyInSec}s)');
277
-
278
- // If we've waited long enough, simulate the video
279
- if (duration.inSeconds >= Configuration.instance.simLoopFrequencyInSec) {
280
- debugPrint('SIMULATION: Triggering simulation after ${duration.inSeconds} seconds');
281
- ClipQueueConstants.logEvent('Triggering simulation after ${duration.inSeconds} seconds');
282
- await _evolveDescription();
283
- _lastDescriptionEvolutionTime = now;
284
- } else {
285
- // debugPrint('SIMULATION: Not enough time elapsed since last simulation');
286
- }
287
- },
288
- );
289
  ClipQueueConstants.logEvent('Started simulation timer');
290
  }
291
 
@@ -304,11 +259,19 @@ class ClipQueueManager {
304
 
305
  // Function to get chat message string
306
  String getChatMessagesString() {
307
- if (_recentChatMessages.isEmpty) return '';
 
 
 
 
 
308
 
309
- return _recentChatMessages.map((msg) =>
310
  "${msg.username}: ${msg.content}"
311
  ).join("\n");
 
 
 
312
  }
313
 
314
  while (retryCount <= maxRetries) {
@@ -377,6 +340,49 @@ class ClipQueueManager {
377
  }
378
  }
379
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  /// Mark a specific clip as played
381
  void markClipAsPlayed(String clipId) {
382
  _logger.logStateChange(
@@ -638,11 +644,8 @@ class ClipQueueManager {
638
  : 'Simulation resumed (video playback resumed)'
639
  );
640
 
641
- // If we're resuming after a pause, update the last evolution time
642
- // to avoid immediate evolution after resuming
643
- if (!isPaused) {
644
- _lastDescriptionEvolutionTime = DateTime.now();
645
- }
646
  }
647
 
648
  /// Print the current state of the queue
@@ -671,7 +674,6 @@ class ClipQueueManager {
671
 
672
  // Cancel all timers first
673
  _bufferCheckTimer?.cancel();
674
- _descriptionEvolutionTimer?.cancel();
675
 
676
  // Complete any pending generation completers
677
  for (var clip in _clipBuffer) {
 
40
  /// Timer for checking the buffer state
41
  Timer? _bufferCheckTimer;
42
 
 
 
43
 
 
 
44
 
45
  /// Whether the manager is disposed
46
  bool _isDisposed = false;
 
84
  );
85
 
86
  // Start listening to chat messages
87
+ debugPrint('CHAT_DEBUG: ClipQueueManager initializing chat service for video $videoId');
88
  final chatService = ChatService();
89
  chatService.initialize().then((_) {
90
+ debugPrint('CHAT_DEBUG: ChatService initialized, joining room $videoId');
91
  chatService.joinRoom(videoId).then((_) {
92
+ debugPrint('CHAT_DEBUG: Joined chat room, setting up message listener');
93
  chatService.chatStream.listen(_addChatMessage);
94
  }).catchError((e) {
95
+ debugPrint('CHAT_DEBUG: Error joining chat room: $e');
96
  });
97
  }).catchError((e) {
98
+ debugPrint('CHAT_DEBUG: Error initializing chat service: $e');
99
  });
100
  }
101
 
102
  /// Add a chat message to the recent messages list
103
  void _addChatMessage(ChatMessage message) {
104
+ debugPrint('CHAT_DEBUG: ClipQueueManager received message - videoId: ${message.videoId}, expected: $videoId, content: "${message.content}"');
105
+
106
  if (message.videoId == videoId) {
107
  _recentChatMessages.add(message);
108
  // Keep only the 5 most recent messages
 
110
  _recentChatMessages.removeAt(0);
111
  }
112
  ClipQueueConstants.logEvent('Added chat message: ${message.content.substring(0, min(20, message.content.length))}...');
113
+ debugPrint('CHAT_DEBUG: Added message to queue manager, total messages: ${_recentChatMessages.length}');
114
+ } else {
115
+ debugPrint('CHAT_DEBUG: Message videoId mismatch - ignoring message');
116
  }
117
  }
118
 
 
160
  );
161
  _clipBuffer.clear();
162
 
163
+ // Reset evolution counter
164
  _evolutionCounter = 0;
 
165
 
166
  // Set initial orientation
167
  _currentOrientation = orientation ?? getOrientationFromDimensions(
 
220
 
221
  /// Start the simulation timer
222
  void _startDescriptionEvolution() {
223
+ // Cancel any existing simulation loop by setting the disposed flag
224
+ // The _runSimulationLoop method will check _isDisposed and exit gracefully
225
 
226
  // Check if simulation is enabled globally in config and from user settings
227
  final settingsService = SettingsService();
 
234
  return;
235
  }
236
 
 
 
 
 
 
237
 
238
+ debugPrint('SIMULATION: Starting simulation with settings: enableSimLoop=${Configuration.instance.enableSimLoop}, userSetting=${settingsService.enableSimulation}, delay=${settingsService.simLoopDelayInSec}s');
239
 
240
+ ClipQueueConstants.logEvent('Starting simulation loop with delay of ${settingsService.simLoopDelayInSec} seconds');
 
241
 
242
+ // Start the simulation loop immediately
243
+ _runSimulationLoop();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  ClipQueueConstants.logEvent('Started simulation timer');
245
  }
246
 
 
259
 
260
  // Function to get chat message string
261
  String getChatMessagesString() {
262
+ debugPrint('CHAT_DEBUG: Getting chat messages for simulation - count: ${_recentChatMessages.length}');
263
+
264
+ if (_recentChatMessages.isEmpty) {
265
+ debugPrint('CHAT_DEBUG: No chat messages available for simulation');
266
+ return '';
267
+ }
268
 
269
+ final messagesString = _recentChatMessages.map((msg) =>
270
  "${msg.username}: ${msg.content}"
271
  ).join("\n");
272
+
273
+ debugPrint('CHAT_DEBUG: Chat messages for simulation: $messagesString');
274
+ return messagesString;
275
  }
276
 
277
  while (retryCount <= maxRetries) {
 
340
  }
341
  }
342
 
343
+ /// Run the simulation loop with delay-based approach
344
+ Future<void> _runSimulationLoop() async {
345
+ while (!_isDisposed) {
346
+ try {
347
+ // Skip if simulation is paused (due to video playback being paused)
348
+ if (_isSimulationPaused) {
349
+ await Future.delayed(const Duration(seconds: 1));
350
+ continue;
351
+ }
352
+
353
+ // Run the simulation
354
+ debugPrint('SIMULATION: Starting simulation iteration');
355
+ ClipQueueConstants.logEvent('Starting simulation iteration');
356
+
357
+ final simulationStart = DateTime.now();
358
+ await _evolveDescription();
359
+ final simulationEnd = DateTime.now();
360
+ final simulationDuration = simulationEnd.difference(simulationStart);
361
+
362
+ debugPrint('SIMULATION: Completed simulation in ${simulationDuration.inMilliseconds}ms');
363
+ ClipQueueConstants.logEvent('Completed simulation in ${simulationDuration.inMilliseconds}ms');
364
+
365
+ // Add the user-configured delay after simulation
366
+ final settingsService = SettingsService();
367
+ final delaySeconds = settingsService.simLoopDelayInSec;
368
+ debugPrint('SIMULATION: Waiting ${delaySeconds}s before next simulation');
369
+ ClipQueueConstants.logEvent('Waiting ${delaySeconds}s before next simulation');
370
+
371
+ await Future.delayed(Duration(seconds: delaySeconds));
372
+
373
+ } catch (e) {
374
+ debugPrint('SIMULATION: Error in simulation loop: $e');
375
+ ClipQueueConstants.logEvent('Error in simulation loop: $e');
376
+
377
+ // Wait a bit before retrying to avoid tight error loops
378
+ await Future.delayed(const Duration(seconds: 5));
379
+ }
380
+ }
381
+
382
+ debugPrint('SIMULATION: Simulation loop ended');
383
+ ClipQueueConstants.logEvent('Simulation loop ended');
384
+ }
385
+
386
  /// Mark a specific clip as played
387
  void markClipAsPlayed(String clipId) {
388
  _logger.logStateChange(
 
644
  : 'Simulation resumed (video playback resumed)'
645
  );
646
 
647
+ // Note: With the delay-based approach, simulation timing is handled
648
+ // internally by the _runSimulationLoop method
 
 
 
649
  }
650
 
651
  /// Print the current state of the queue
 
674
 
675
  // Cancel all timers first
676
  _bufferCheckTimer?.cancel();
 
677
 
678
  // Complete any pending generation completers
679
  for (var clip in _clipBuffer) {
lib/services/settings_service.dart CHANGED
@@ -9,6 +9,8 @@ class SettingsService {
9
  static const String _negativePromptKey = 'negative_video_prompt';
10
  static const String _showSceneDebugInfoKey = 'show_scene_debug_info';
11
  static const String _enableSimulationKey = 'enable_simulation';
 
 
12
  static const String _llmProviderKey = 'llm_provider';
13
  static const String _llmModelKey = 'llm_model';
14
  static const String _llmApiKeyKey = 'llm_api_key';
@@ -61,6 +63,20 @@ class SettingsService {
61
  _settingsController.add(null);
62
  }
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  String get llmProvider => _prefs.getString(_llmProviderKey) ?? 'built-in';
65
 
66
  Future<void> setLlmProvider(String provider) async {
 
9
  static const String _negativePromptKey = 'negative_video_prompt';
10
  static const String _showSceneDebugInfoKey = 'show_scene_debug_info';
11
  static const String _enableSimulationKey = 'enable_simulation';
12
+ static const String _simLoopDelayKey = 'sim_loop_delay_in_sec';
13
+ static const String _gameMasterPromptKey = 'game_master_prompt';
14
  static const String _llmProviderKey = 'llm_provider';
15
  static const String _llmModelKey = 'llm_model';
16
  static const String _llmApiKeyKey = 'llm_api_key';
 
63
  _settingsController.add(null);
64
  }
65
 
66
+ int get simLoopDelayInSec => _prefs.getInt(_simLoopDelayKey) ?? 5;
67
+
68
+ Future<void> setSimLoopDelayInSec(int value) async {
69
+ await _prefs.setInt(_simLoopDelayKey, value);
70
+ _settingsController.add(null);
71
+ }
72
+
73
+ String get gameMasterPrompt => _prefs.getString(_gameMasterPromptKey) ?? '';
74
+
75
+ Future<void> setGameMasterPrompt(String value) async {
76
+ await _prefs.setString(_gameMasterPromptKey, value);
77
+ _settingsController.add(null);
78
+ }
79
+
80
  String get llmProvider => _prefs.getString(_llmProviderKey) ?? 'built-in';
81
 
82
  Future<void> setLlmProvider(String provider) async {
lib/services/websocket_api_service.dart CHANGED
@@ -15,6 +15,7 @@ import 'package:web_socket_channel/web_socket_channel.dart';
15
  import '../models/search_state.dart';
16
  import '../models/video_result.dart';
17
  import '../models/video_orientation.dart';
 
18
 
19
  class WebSocketRequest {
20
  final String requestId;
@@ -49,6 +50,9 @@ class WebSocketApiService {
49
  factory WebSocketApiService() => _instance;
50
  WebSocketApiService._internal();
51
 
 
 
 
52
  // Dynamically build WebSocket URL based on current host in web platform
53
  // or use environment variable/production URL/localhost for development on other platforms
54
  static String get _wsUrl {
@@ -60,13 +64,13 @@ class WebSocketApiService {
60
  // For localhost, explicitly include port 8080
61
  if (location.host == 'localhost' || location.host.startsWith('localhost:')) {
62
  final url = '$protocol://localhost:8080/ws';
63
- debugPrint('WebSocketApiService: Using localhost:8080 WebSocket URL: $url');
64
  return url;
65
  }
66
 
67
  // For other hosts, include the original port number in the URL
68
  final url = '$protocol://${location.host}/ws';
69
- debugPrint('WebSocketApiService: Using dynamic WebSocket URL: $url');
70
  return url;
71
  } else {
72
  // First try to get WebSocket URL from environment variable (highest priority)
@@ -143,7 +147,7 @@ class WebSocketApiService {
143
  if (_initialized) return;
144
 
145
  try {
146
- debugPrint('WebSocketApiService: Initializing and connecting...');
147
 
148
  // Add page unload handler for web platform
149
  if (kIsWeb) {
@@ -215,7 +219,7 @@ class WebSocketApiService {
215
  if (response['success'] == true && response['user_role'] != null) {
216
  _userRole = response['user_role'] as String;
217
  _userRoleController.add(_userRole);
218
- debugPrint('WebSocketApiService: User role set to $_userRole');
219
 
220
  // Now that we know the role, check device connection limit for non-anonymous users
221
  if (kIsWeb && _userRole != 'anon') {
@@ -892,6 +896,7 @@ class WebSocketApiService {
892
  'model': llmModel,
893
  'api_key': llmApiKey,
894
  'hf_token': hfApiKey,
 
895
  },
896
  },
897
  ),
@@ -1030,7 +1035,7 @@ class WebSocketApiService {
1030
  final action = data['action'] as String?;
1031
  final requestId = data['requestId'] as String?;
1032
 
1033
- debugPrint('WebSocketApiService: Received message for action: $action, requestId: $requestId');
1034
 
1035
  // Update user role if present in response (from heartbeat or get_user_role)
1036
  if (data['user_role'] != null) {
@@ -1233,13 +1238,15 @@ class WebSocketApiService {
1233
 
1234
  try {
1235
  final requestData = request.toJson();
1236
- debugPrint('WebSocketApiService: Sending request ${request.requestId} (${request.action}): ${json.encode(requestData)}');
 
 
1237
  _channel!.sink.add(json.encode(requestData));
1238
 
1239
  final response = await completer.future.timeout(
1240
  timeout ?? const Duration(seconds: 10),
1241
  onTimeout: () {
1242
- debugPrint('WebSocketApiService: Request ${request.requestId} timed out');
1243
  _cleanup(request.requestId);
1244
  throw TimeoutException('Request timeout');
1245
  },
@@ -1280,6 +1287,7 @@ class WebSocketApiService {
1280
  'provider': llmProvider,
1281
  'model': llmModel,
1282
  'api_key': llmApiKey,
 
1283
  },
1284
  },
1285
  ),
@@ -1308,7 +1316,7 @@ class WebSocketApiService {
1308
  int height = 320,
1309
  int width = 512,
1310
  int seed = 0,
1311
- Duration timeout = const Duration(seconds: 12), // we keep things super tight, as normally a video only takes 2~3s to generate
1312
  VideoOrientation orientation = VideoOrientation.LANDSCAPE,
1313
  }) async {
1314
  final settings = SettingsService();
@@ -1362,6 +1370,7 @@ class WebSocketApiService {
1362
  'provider': llmProvider,
1363
  'model': llmModel,
1364
  'api_key': llmApiKey,
 
1365
  },
1366
  },
1367
  ),
@@ -1395,6 +1404,10 @@ class WebSocketApiService {
1395
  }
1396
 
1397
  debugPrint('WebSocketApiService: Sending simulation request for video $videoId (evolution #$evolutionCount)');
 
 
 
 
1398
 
1399
  try {
1400
  // If chat messages are provided directly, use them; otherwise the default empty string is used
@@ -1444,6 +1457,7 @@ class WebSocketApiService {
1444
  'provider': llmProvider,
1445
  'model': llmModel,
1446
  'api_key': llmApiKey,
 
1447
  },
1448
  },
1449
  ),
 
15
  import '../models/search_state.dart';
16
  import '../models/video_result.dart';
17
  import '../models/video_orientation.dart';
18
+ import '../utils/colored_logger.dart';
19
 
20
  class WebSocketRequest {
21
  final String requestId;
 
50
  factory WebSocketApiService() => _instance;
51
  WebSocketApiService._internal();
52
 
53
+ // Colored logger
54
+ final _log = ColoredLogger.get('WebSocketApiService');
55
+
56
  // Dynamically build WebSocket URL based on current host in web platform
57
  // or use environment variable/production URL/localhost for development on other platforms
58
  static String get _wsUrl {
 
64
  // For localhost, explicitly include port 8080
65
  if (location.host == 'localhost' || location.host.startsWith('localhost:')) {
66
  final url = '$protocol://localhost:8080/ws';
67
+ ColoredLogger.get('WebSocketApiService').network('Using localhost:8080 WebSocket URL: $url');
68
  return url;
69
  }
70
 
71
  // For other hosts, include the original port number in the URL
72
  final url = '$protocol://${location.host}/ws';
73
+ ColoredLogger.get('WebSocketApiService').network('Using dynamic WebSocket URL: $url');
74
  return url;
75
  } else {
76
  // First try to get WebSocket URL from environment variable (highest priority)
 
147
  if (_initialized) return;
148
 
149
  try {
150
+ _log.info('Initializing and connecting...');
151
 
152
  // Add page unload handler for web platform
153
  if (kIsWeb) {
 
219
  if (response['success'] == true && response['user_role'] != null) {
220
  _userRole = response['user_role'] as String;
221
  _userRoleController.add(_userRole);
222
+ _log.success('User role set to $_userRole');
223
 
224
  // Now that we know the role, check device connection limit for non-anonymous users
225
  if (kIsWeb && _userRole != 'anon') {
 
896
  'model': llmModel,
897
  'api_key': llmApiKey,
898
  'hf_token': hfApiKey,
899
+ 'game_master_prompt': settings.gameMasterPrompt,
900
  },
901
  },
902
  ),
 
1035
  final action = data['action'] as String?;
1036
  final requestId = data['requestId'] as String?;
1037
 
1038
+ _log.websocket('Received message for action: $action, requestId: [$requestId]');
1039
 
1040
  // Update user role if present in response (from heartbeat or get_user_role)
1041
  if (data['user_role'] != null) {
 
1238
 
1239
  try {
1240
  final requestData = request.toJson();
1241
+ _log.websocket('Sending request [${request.requestId}] (${request.action})', {
1242
+ 'data': json.encode(requestData)
1243
+ });
1244
  _channel!.sink.add(json.encode(requestData));
1245
 
1246
  final response = await completer.future.timeout(
1247
  timeout ?? const Duration(seconds: 10),
1248
  onTimeout: () {
1249
+ _log.error('Request [${request.requestId}] timed out');
1250
  _cleanup(request.requestId);
1251
  throw TimeoutException('Request timeout');
1252
  },
 
1287
  'provider': llmProvider,
1288
  'model': llmModel,
1289
  'api_key': llmApiKey,
1290
+ 'game_master_prompt': settings.gameMasterPrompt,
1291
  },
1292
  },
1293
  ),
 
1316
  int height = 320,
1317
  int width = 512,
1318
  int seed = 0,
1319
+ Duration timeout = const Duration(seconds: 8), // we keep things super tight to fail quickly, as normally a video only takes 2 seconds to generate (including the transatlantic round trip)
1320
  VideoOrientation orientation = VideoOrientation.LANDSCAPE,
1321
  }) async {
1322
  final settings = SettingsService();
 
1370
  'provider': llmProvider,
1371
  'model': llmModel,
1372
  'api_key': llmApiKey,
1373
+ 'game_master_prompt': settings.gameMasterPrompt,
1374
  },
1375
  },
1376
  ),
 
1404
  }
1405
 
1406
  debugPrint('WebSocketApiService: Sending simulation request for video $videoId (evolution #$evolutionCount)');
1407
+ debugPrint('CHAT_DEBUG: WebSocket simulate() called with chatMessages length: ${chatMessages.length}');
1408
+ if (chatMessages.isNotEmpty) {
1409
+ debugPrint('CHAT_DEBUG: Chat messages content: $chatMessages');
1410
+ }
1411
 
1412
  try {
1413
  // If chat messages are provided directly, use them; otherwise the default empty string is used
 
1457
  'provider': llmProvider,
1458
  'model': llmModel,
1459
  'api_key': llmApiKey,
1460
+ 'game_master_prompt': settings.gameMasterPrompt,
1461
  },
1462
  },
1463
  ),
lib/utils/colored_logger.dart ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import 'dart:developer' as developer;
2
+ import 'package:flutter/foundation.dart';
3
+
4
+ /// ANSI color codes for terminal output
5
+ class AnsiColors {
6
+ static const String reset = '\x1B[0m';
7
+ static const String bold = '\x1B[1m';
8
+ static const String dim = '\x1B[2m';
9
+ static const String italic = '\x1B[3m';
10
+ static const String underline = '\x1B[4m';
11
+
12
+ // Foreground colors
13
+ static const String black = '\x1B[30m';
14
+ static const String red = '\x1B[31m';
15
+ static const String green = '\x1B[32m';
16
+ static const String yellow = '\x1B[33m';
17
+ static const String blue = '\x1B[34m';
18
+ static const String magenta = '\x1B[35m';
19
+ static const String cyan = '\x1B[36m';
20
+ static const String white = '\x1B[37m';
21
+
22
+ // Bright foreground colors
23
+ static const String brightBlack = '\x1B[90m';
24
+ static const String brightRed = '\x1B[91m';
25
+ static const String brightGreen = '\x1B[92m';
26
+ static const String brightYellow = '\x1B[93m';
27
+ static const String brightBlue = '\x1B[94m';
28
+ static const String brightMagenta = '\x1B[95m';
29
+ static const String brightCyan = '\x1B[96m';
30
+ static const String brightWhite = '\x1B[97m';
31
+
32
+ // Background colors
33
+ static const String bgBlack = '\x1B[40m';
34
+ static const String bgRed = '\x1B[41m';
35
+ static const String bgGreen = '\x1B[42m';
36
+ static const String bgYellow = '\x1B[43m';
37
+ static const String bgBlue = '\x1B[44m';
38
+ static const String bgMagenta = '\x1B[45m';
39
+ static const String bgCyan = '\x1B[46m';
40
+ static const String bgWhite = '\x1B[47m';
41
+ }
42
+
43
+ /// Log levels with associated colors and emojis
44
+ enum LogLevel {
45
+ debug(AnsiColors.brightBlack, 'πŸ”', 'DEBUG'),
46
+ info(AnsiColors.brightCyan, 'πŸ’‘', 'INFO'),
47
+ warning(AnsiColors.brightYellow, '⚠️', 'WARN'),
48
+ error(AnsiColors.brightRed, '❌', 'ERROR'),
49
+ success(AnsiColors.brightGreen, 'βœ…', 'SUCCESS'),
50
+ network(AnsiColors.brightMagenta, '🌐', 'NET'),
51
+ websocket(AnsiColors.cyan, 'πŸ”Œ', 'WS'),
52
+ video(AnsiColors.brightBlue, '🎬', 'VIDEO'),
53
+ chat(AnsiColors.green, 'πŸ’¬', 'CHAT'),
54
+ search(AnsiColors.yellow, 'πŸ”', 'SEARCH');
55
+
56
+ const LogLevel(this.color, this.emoji, this.label);
57
+
58
+ final String color;
59
+ final String emoji;
60
+ final String label;
61
+ }
62
+
63
+ /// Beautiful colored logger for Flutter applications
64
+ class ColoredLogger {
65
+ final String _className;
66
+
67
+ ColoredLogger(this._className);
68
+
69
+ /// Create a logger for a specific class
70
+ static ColoredLogger get(String className) {
71
+ return ColoredLogger(className);
72
+ }
73
+
74
+ /// Debug level logging - for detailed debugging info
75
+ void debug(String message, [Map<String, dynamic>? data]) {
76
+ _log(LogLevel.debug, message, data);
77
+ }
78
+
79
+ /// Info level logging - for general information
80
+ void info(String message, [Map<String, dynamic>? data]) {
81
+ _log(LogLevel.info, message, data);
82
+ }
83
+
84
+ /// Warning level logging - for potential issues
85
+ void warning(String message, [Map<String, dynamic>? data]) {
86
+ _log(LogLevel.warning, message, data);
87
+ }
88
+
89
+ /// Error level logging - for errors and exceptions
90
+ void error(String message, [Map<String, dynamic>? data]) {
91
+ _log(LogLevel.error, message, data);
92
+ }
93
+
94
+ /// Success level logging - for successful operations
95
+ void success(String message, [Map<String, dynamic>? data]) {
96
+ _log(LogLevel.success, message, data);
97
+ }
98
+
99
+ /// Network level logging - for network operations
100
+ void network(String message, [Map<String, dynamic>? data]) {
101
+ _log(LogLevel.network, message, data);
102
+ }
103
+
104
+ /// WebSocket level logging - for WebSocket operations
105
+ void websocket(String message, [Map<String, dynamic>? data]) {
106
+ _log(LogLevel.websocket, message, data);
107
+ }
108
+
109
+ /// Video level logging - for video generation operations
110
+ void video(String message, [Map<String, dynamic>? data]) {
111
+ _log(LogLevel.video, message, data);
112
+ }
113
+
114
+ /// Chat level logging - for chat operations
115
+ void chat(String message, [Map<String, dynamic>? data]) {
116
+ _log(LogLevel.chat, message, data);
117
+ }
118
+
119
+ /// Search level logging - for search operations
120
+ void search(String message, [Map<String, dynamic>? data]) {
121
+ _log(LogLevel.search, message, data);
122
+ }
123
+
124
+ void _log(LogLevel level, String message, Map<String, dynamic>? data) {
125
+ if (!kDebugMode) return; // Only log in debug mode
126
+
127
+ final timestamp = DateTime.now();
128
+ final timeStr = '${timestamp.hour.toString().padLeft(2, '0')}:'
129
+ '${timestamp.minute.toString().padLeft(2, '0')}:'
130
+ '${timestamp.second.toString().padLeft(2, '0')}.'
131
+ '${timestamp.millisecond.toString().padLeft(3, '0')}';
132
+
133
+ // Format the main log message with colors
134
+ final coloredMessage = _colorizeMessage(message);
135
+
136
+ // Build the log line
137
+ final logLine = StringBuffer();
138
+
139
+ // Timestamp (dim)
140
+ logLine.write('${AnsiColors.dim}$timeStr${AnsiColors.reset} ');
141
+
142
+ // Level with color and emoji
143
+ logLine.write('${level.color}${level.emoji} ${level.label.padRight(7)}${AnsiColors.reset} ');
144
+
145
+ // Class name (bright black)
146
+ logLine.write('${AnsiColors.brightBlack}[$_className]${AnsiColors.reset} ');
147
+
148
+ // Message
149
+ logLine.write(coloredMessage);
150
+
151
+ // Add data if provided
152
+ if (data != null && data.isNotEmpty) {
153
+ logLine.write(' ${AnsiColors.dim}${_formatData(data)}${AnsiColors.reset}');
154
+ }
155
+
156
+ // Use developer.log for better IDE integration
157
+ developer.log(
158
+ logLine.toString(),
159
+ name: _className,
160
+ level: _getLevelValue(level),
161
+ );
162
+ }
163
+
164
+ String _colorizeMessage(String message) {
165
+ String result = message;
166
+
167
+ // Highlight request IDs in brackets
168
+ result = result.replaceAllMapped(
169
+ RegExp(r'\[([a-zA-Z0-9-]+)\]'),
170
+ (match) => '${AnsiColors.brightGreen}[${match.group(1)}]${AnsiColors.reset}',
171
+ );
172
+
173
+ // Highlight user IDs
174
+ result = result.replaceAllMapped(
175
+ RegExp(r'\buser ([a-zA-Z0-9-]+)'),
176
+ (match) => 'user ${AnsiColors.brightBlue}${match.group(1)}${AnsiColors.reset}',
177
+ );
178
+
179
+ // Highlight actions
180
+ result = result.replaceAllMapped(
181
+ RegExp(r'\b(generate_video|search|simulate|join_chat|leave_chat|chat_message|connect|disconnect)\b'),
182
+ (match) => '${AnsiColors.brightYellow}${match.group(1)}${AnsiColors.reset}',
183
+ );
184
+
185
+ // Highlight status keywords
186
+ result = result.replaceAllMapped(
187
+ RegExp(r'\b(success|successful|completed|connected|ready|ok)\b', caseSensitive: false),
188
+ (match) => '${AnsiColors.brightGreen}${match.group(1)}${AnsiColors.reset}',
189
+ );
190
+
191
+ result = result.replaceAllMapped(
192
+ RegExp(r'\b(error|failed|timeout|exception|crash)\b', caseSensitive: false),
193
+ (match) => '${AnsiColors.brightRed}${match.group(1)}${AnsiColors.reset}',
194
+ );
195
+
196
+ result = result.replaceAllMapped(
197
+ RegExp(r'\b(warning|retry|reconnect|fallback)\b', caseSensitive: false),
198
+ (match) => '${AnsiColors.brightYellow}${match.group(1)}${AnsiColors.reset}',
199
+ );
200
+
201
+ // Highlight numbers with units
202
+ result = result.replaceAllMapped(
203
+ RegExp(r'\b(\d+\.?\d*)(ms|s|MB|KB|bytes|chars|fps)?\b'),
204
+ (match) => '${AnsiColors.brightMagenta}${match.group(1)}${AnsiColors.cyan}${match.group(2) ?? ''}${AnsiColors.reset}',
205
+ );
206
+
207
+ // Highlight URLs
208
+ result = result.replaceAllMapped(
209
+ RegExp(r'https?://[^\s]+'),
210
+ (match) => '${AnsiColors.underline}${AnsiColors.brightCyan}${match.group(0)}${AnsiColors.reset}',
211
+ );
212
+
213
+ // Highlight JSON-like structures
214
+ result = result.replaceAllMapped(
215
+ RegExp(r'\{[^}]*\}'),
216
+ (match) => '${AnsiColors.dim}${match.group(0)}${AnsiColors.reset}',
217
+ );
218
+
219
+ // Highlight strings in quotes
220
+ result = result.replaceAllMapped(
221
+ RegExp(r'"([^"]*)"'),
222
+ (match) => '"${AnsiColors.green}${match.group(1)}${AnsiColors.reset}"',
223
+ );
224
+
225
+ return result;
226
+ }
227
+
228
+ String _formatData(Map<String, dynamic> data) {
229
+ final entries = data.entries.map((e) {
230
+ final key = e.key;
231
+ final value = e.value.toString();
232
+ return '${AnsiColors.cyan}$key${AnsiColors.reset}=${AnsiColors.brightWhite}$value${AnsiColors.reset}';
233
+ }).join(' ');
234
+
235
+ return '{$entries}';
236
+ }
237
+
238
+ int _getLevelValue(LogLevel level) {
239
+ switch (level) {
240
+ case LogLevel.debug:
241
+ return 500;
242
+ case LogLevel.info:
243
+ return 800;
244
+ case LogLevel.warning:
245
+ return 900;
246
+ case LogLevel.error:
247
+ return 1000;
248
+ case LogLevel.success:
249
+ return 800;
250
+ case LogLevel.network:
251
+ return 700;
252
+ case LogLevel.websocket:
253
+ return 700;
254
+ case LogLevel.video:
255
+ return 700;
256
+ case LogLevel.chat:
257
+ return 700;
258
+ case LogLevel.search:
259
+ return 700;
260
+ }
261
+ }
262
+ }
263
+
264
+ /// Extension methods for easy logging
265
+ extension ColoredLogging on Object {
266
+ ColoredLogger get log => ColoredLogger.get(runtimeType.toString());
267
+ }
268
+
269
+ /// Global logger instance for quick access
270
+ final appLog = ColoredLogger.get('App');
lib/widgets/chat_widget.dart CHANGED
@@ -251,6 +251,7 @@ class _ChatWidgetState extends State<ChatWidget> {
251
  style: const TextStyle(color: TikSlopColors.onSurface),
252
  maxLength: 255,
253
  maxLines: 1,
 
254
  onChanged: (value) {
255
  // Enforce the character limit by trimming excess characters
256
  if (value.length > 255) {
@@ -261,7 +262,7 @@ class _ChatWidgetState extends State<ChatWidget> {
261
  }
262
  },
263
  decoration: InputDecoration(
264
- hintText: 'Chat with this tikslopr..',
265
  hintStyle: const TextStyle(color: TikSlopColors.onSurfaceVariant, fontSize: 16),
266
  border: OutlineInputBorder(
267
  borderRadius: BorderRadius.circular(12),
@@ -417,7 +418,7 @@ class _ChatWidgetState extends State<ChatWidget> {
417
  Icon(Icons.chat, color: TikSlopColors.onBackground),
418
  SizedBox(width: 8),
419
  Text(
420
- 'Simulation log',
421
  style: TextStyle(
422
  color: TikSlopColors.onBackground,
423
  fontSize: 16,
 
251
  style: const TextStyle(color: TikSlopColors.onSurface),
252
  maxLength: 255,
253
  maxLines: 1,
254
+ onSubmitted: (_) => _sendMessage(),
255
  onChanged: (value) {
256
  // Enforce the character limit by trimming excess characters
257
  if (value.length > 255) {
 
262
  }
263
  },
264
  decoration: InputDecoration(
265
+ hintText: 'Ask the game master!',
266
  hintStyle: const TextStyle(color: TikSlopColors.onSurfaceVariant, fontSize: 16),
267
  border: OutlineInputBorder(
268
  borderRadius: BorderRadius.circular(12),
 
418
  Icon(Icons.chat, color: TikSlopColors.onBackground),
419
  SizedBox(width: 8),
420
  Text(
421
+ 'World simulator log',
422
  style: TextStyle(
423
  color: TikSlopColors.onBackground,
424
  fontSize: 16,
server/README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ server/
3
+ β”œβ”€β”€ __init__.py
4
+ β”œβ”€β”€ api_config.py # Configuration constants
5
+ β”œβ”€β”€ api_core.py # Main API class (now much cleaner!)
6
+ β”œβ”€β”€ api_metrics.py # Metrics functionality
7
+ β”œβ”€β”€ api_session.py # Session management
8
+ β”œβ”€β”€ chat.py # Chat room management
9
+ β”œβ”€β”€ config_utils.py # Configuration utilities
10
+ β”œβ”€β”€ endpoint_manager.py # Endpoint management with error handling
11
+ β”œβ”€β”€ llm_utils.py # LLM client and text generation
12
+ β”œβ”€β”€ models.py # Data models and types
13
+ β”œβ”€β”€ utils.py # Generic utilities (YAML parsing, etc.)
14
+ └── video_utils.py # Video generation (HF endpoints + Gradio)
server/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Server package initialization
2
+ """
3
+ This package contains the server-side components for the TikSlop API.
4
+ """
api_config.py β†’ server/api_config.py RENAMED
File without changes
api_core.py β†’ server/api_core.py RENAMED
@@ -4,396 +4,56 @@ import io
4
  import re
5
  import base64
6
  import uuid
7
- from typing import Dict, Any, Optional, List, Literal
8
- from dataclasses import dataclass
9
- from asyncio import Lock, Queue
10
  import asyncio
11
  import time
12
  import datetime
13
- from contextlib import asynccontextmanager
14
  from collections import defaultdict
15
  from aiohttp import web, ClientSession
16
- from huggingface_hub import InferenceClient, HfApi
17
  from gradio_client import Client
18
  import random
19
  import yaml
20
  import json
21
 
22
- from api_config import *
23
-
24
- # User role type
25
- UserRole = Literal['anon', 'normal', 'pro', 'admin']
26
-
27
- # Configure logging
28
- logging.basicConfig(
29
- level=logging.INFO,
30
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 
 
 
 
 
 
 
 
 
31
  )
32
- logger = logging.getLogger(__name__)
33
-
34
-
35
- def generate_seed():
36
- """Generate a random positive 32-bit integer seed."""
37
- return random.randint(0, 2**32 - 1)
38
-
39
- def sanitize_yaml_response(response_text: str) -> str:
40
- """
41
- Sanitize and format AI response into valid YAML.
42
- Returns properly formatted YAML string.
43
- """
44
-
45
- response_text = response_text.split("```")[0]
46
-
47
- # Remove any markdown code block indicators and YAML document markers
48
- clean_text = re.sub(r'```yaml|```|---|\.\.\.$', '', response_text.strip())
49
-
50
- # Split into lines and process each line
51
- lines = clean_text.split('\n')
52
- sanitized_lines = []
53
- current_field = None
54
-
55
- for line in lines:
56
- stripped = line.strip()
57
- if not stripped:
58
- continue
59
-
60
- # Handle field starts
61
- if stripped.startswith('title:') or stripped.startswith('description:'):
62
- # Ensure proper YAML format with space after colon and proper quoting
63
- field_name = stripped.split(':', 1)[0]
64
- field_value = stripped.split(':', 1)[1].strip().strip('"\'')
65
-
66
- # Quote the value if it contains special characters
67
- if any(c in field_value for c in ':[]{},&*#?|-<>=!%@`'):
68
- field_value = f'"{field_value}"'
69
-
70
- sanitized_lines.append(f"{field_name}: {field_value}")
71
- current_field = field_name
72
-
73
- elif stripped.startswith('tags:'):
74
- sanitized_lines.append('tags:')
75
- current_field = 'tags'
76
-
77
- elif stripped.startswith('-') and current_field == 'tags':
78
- # Process tag values
79
- tag = stripped[1:].strip().strip('"\'')
80
- if tag:
81
- # Clean and format tag
82
- tag = re.sub(r'[^\x00-\x7F]+', '', tag) # Remove non-ASCII
83
- tag = re.sub(r'[^a-zA-Z0-9\s-]', '', tag) # Keep only alphanumeric and hyphen
84
- tag = tag.strip().lower().replace(' ', '-')
85
- if tag:
86
- sanitized_lines.append(f" - {tag}")
87
-
88
- elif current_field in ['title', 'description']:
89
- # Handle multi-line title/description continuation
90
- value = stripped.strip('"\'')
91
- if value:
92
- # Append to previous line
93
- prev = sanitized_lines[-1]
94
- sanitized_lines[-1] = f"{prev} {value}"
95
-
96
- # Ensure the YAML has all required fields
97
- required_fields = {'title', 'description', 'tags'}
98
- found_fields = {line.split(':')[0].strip() for line in sanitized_lines if ':' in line}
99
-
100
- for field in required_fields - found_fields:
101
- if field == 'tags':
102
- sanitized_lines.extend(['tags:', ' - default'])
103
- else:
104
- sanitized_lines.append(f'{field}: "No {field} provided"')
105
-
106
- return '\n'.join(sanitized_lines)
107
-
108
- @dataclass
109
- class Endpoint:
110
- id: int
111
- url: str
112
- busy: bool = False
113
- last_used: float = 0
114
- error_count: int = 0
115
- error_until: float = 0 # Timestamp until which this endpoint is considered in error state
116
-
117
- class EndpointManager:
118
- def __init__(self):
119
- self.endpoints: List[Endpoint] = []
120
- self.lock = Lock()
121
- self.initialize_endpoints()
122
- self.last_used_index = -1 # Track the last used endpoint for round-robin
123
-
124
- def initialize_endpoints(self):
125
- """Initialize the list of endpoints"""
126
- for i, url in enumerate(VIDEO_ROUND_ROBIN_ENDPOINT_URLS):
127
- endpoint = Endpoint(id=i + 1, url=url)
128
- self.endpoints.append(endpoint)
129
-
130
- def _get_next_free_endpoint(self):
131
- """Get the next available non-busy endpoint, or oldest endpoint if all are busy"""
132
- current_time = time.time()
133
-
134
- # First priority: Get any non-busy and non-error endpoint
135
- free_endpoints = [
136
- ep for ep in self.endpoints
137
- if not ep.busy and current_time > ep.error_until
138
- ]
139
-
140
- if free_endpoints:
141
- # Return the least recently used free endpoint
142
- return min(free_endpoints, key=lambda ep: ep.last_used)
143
-
144
- # Second priority: If all busy/error, use round-robin but skip error endpoints
145
- tried_count = 0
146
- next_index = self.last_used_index
147
-
148
- while tried_count < len(self.endpoints):
149
- next_index = (next_index + 1) % len(self.endpoints)
150
- tried_count += 1
151
-
152
- # If endpoint is not in error state, use it
153
- if current_time > self.endpoints[next_index].error_until:
154
- self.last_used_index = next_index
155
- return self.endpoints[next_index]
156
-
157
- # If all endpoints are in error state, use the one with earliest error expiry
158
- self.last_used_index = next_index
159
- return min(self.endpoints, key=lambda ep: ep.error_until)
160
-
161
- @asynccontextmanager
162
- async def get_endpoint(self, max_wait_time: int = 10):
163
- """Get the next available endpoint using a context manager"""
164
- start_time = time.time()
165
- endpoint = None
166
-
167
- try:
168
- while True:
169
- if time.time() - start_time > max_wait_time:
170
- raise TimeoutError(f"Could not acquire an endpoint within {max_wait_time} seconds")
171
-
172
- async with self.lock:
173
- # Get the next available endpoint using our selection strategy
174
- endpoint = self._get_next_free_endpoint()
175
-
176
- # Mark it as busy
177
- endpoint.busy = True
178
- endpoint.last_used = time.time()
179
- #logger.info(f"Using endpoint {endpoint.id} (busy: {endpoint.busy}, last used: {endpoint.last_used})")
180
- break
181
-
182
- yield endpoint
183
-
184
- finally:
185
- if endpoint:
186
- async with self.lock:
187
- endpoint.busy = False
188
- endpoint.last_used = time.time()
189
- # We don't need to put back into queue - our strategy now picks directly from the list
190
 
191
- class ChatRoom:
192
- def __init__(self):
193
- self.messages = []
194
- self.connected_clients = set()
195
- self.max_history = 100
196
 
197
- def add_message(self, message):
198
- self.messages.append(message)
199
- if len(self.messages) > self.max_history:
200
- self.messages.pop(0)
201
 
202
- def get_recent_messages(self, limit=50):
203
- return self.messages[-limit:]
204
 
205
  class VideoGenerationAPI:
206
  def __init__(self):
207
  self.hf_api = HfApi(token=HF_TOKEN)
208
  self.endpoint_manager = EndpointManager()
209
  self.active_requests: Dict[str, asyncio.Future] = {}
210
- self.chat_rooms = defaultdict(ChatRoom)
211
  self.video_events: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
212
  self.event_history_limit = 50
213
  # Cache for user roles to avoid repeated API calls
214
  self.user_role_cache: Dict[str, Dict[str, Any]] = {}
215
  # Cache expiration time (10 minutes)
216
  self.cache_expiration = 600
217
-
218
- def _get_inference_client(self, llm_config: Optional[dict] = None) -> InferenceClient:
219
- """
220
- Get an InferenceClient configured with the provided LLM settings.
221
-
222
- Priority order for API keys:
223
- 1. Provider-specific API key (if provided)
224
- 2. User's HF token (if provided)
225
- 3. Server's HF token (only for built-in provider)
226
- 4. Raise exception if no valid key is available
227
- """
228
-
229
- if not llm_config:
230
- if HF_TOKEN:
231
- return InferenceClient(
232
- model=TEXT_MODEL,
233
- token=HF_TOKEN
234
- )
235
- else:
236
- raise ValueError("Built-in provider is not available. Server HF_TOKEN is not configured.")
237
-
238
- provider = llm_config.get('provider', '').lower()
239
- logger.info(f"provider = {provider}")
240
-
241
- # If no provider or model specified, use default
242
- if not provider or provider == 'built-in':
243
- if HF_TOKEN:
244
- return InferenceClient(
245
- model=TEXT_MODEL,
246
- token=HF_TOKEN
247
- )
248
- else:
249
- raise ValueError("Built-in provider is not available. Server HF_TOKEN is not configured.")
250
-
251
- model = llm_config.get('model', '')
252
- user_provider_api_key = llm_config.get('api_key', '') # Provider-specific API key
253
- user_hf_token = llm_config.get('hf_token', '') # User's HF token
254
-
255
- #logger.info(f"model = {model}")
256
- #logger.info(f"user_provider_api_key = {user_provider_api_key}")
257
- #logger.info(f"user_hf_token = {user_hf_token}")
258
-
259
- # If no provider or model specified, use default
260
- if not provider or provider == 'built-in':
261
- if HF_TOKEN:
262
- return InferenceClient(
263
- model=TEXT_MODEL,
264
- token=HF_TOKEN
265
- )
266
- else:
267
- raise ValueError("Built-in provider is not available. Server HF_TOKEN is not configured.")
268
-
269
- try:
270
- # Case 1: Use a provider with a provider-specific API key if available
271
- # This mode is currently hidden in the Flutter UI (we don't ask for provider-specific keys yet)
272
- # but it is implemented here so that we don't forget it later
273
- if user_provider_api_key:
274
- return InferenceClient(
275
- provider=hf_provider,
276
- model=model,
277
- api_key=user_provider_api_key
278
- )
279
-
280
- # Case 2: Use a provider with user's HF token if available
281
- elif user_hf_token:
282
- return InferenceClient(
283
- provider=hf_provider,
284
- model=model,
285
- token=user_hf_token
286
- )
287
- #
288
- else:
289
- raise ValueError(f"No API key provided for provider '{provider}'. Please provide either a valid {provider} API key or your Hugging Face API key.")
290
-
291
- except ValueError:
292
- # Re-raise ValueError for missing API keys
293
- raise
294
- except Exception as e:
295
- logger.error(f"Error creating InferenceClient for provider '{provider}' and model '{model}': {e}")
296
- # Re-raise all other exceptions
297
- raise
298
-
299
- async def _generate_text(self, prompt: str, llm_config: Optional[dict] = None,
300
- max_new_tokens: int = 200, temperature: float = 0.7,
301
- model_override: Optional[str] = None) -> str:
302
- """
303
- Helper method to generate text using the appropriate client and configuration.
304
- Tries chat_completion first (modern standard), falls back to text_generation.
305
-
306
- Args:
307
- prompt: The prompt to generate text from
308
- llm_config: Optional LLM configuration dict
309
- max_new_tokens: Maximum number of new tokens to generate
310
- temperature: Temperature for generation
311
- model_override: Optional model to use instead of the one in llm_config
312
-
313
- Returns:
314
- Generated text string
315
- """
316
- # Get the appropriate client
317
- client = self._get_inference_client(llm_config)
318
-
319
- # Determine the model to use
320
- if model_override:
321
- model_to_use = model_override
322
- elif llm_config:
323
- model_to_use = llm_config.get('model', TEXT_MODEL)
324
- else:
325
- model_to_use = TEXT_MODEL
326
-
327
- # Try chat_completion first (modern standard, more widely supported)
328
- try:
329
- messages = [{"role": "user", "content": prompt}]
330
-
331
- if llm_config and llm_config.get('provider') != 'huggingface':
332
- # For third-party providers
333
- completion = await asyncio.get_event_loop().run_in_executor(
334
- None,
335
- lambda: client.chat.completions.create(
336
- messages=messages,
337
- max_tokens=max_new_tokens,
338
- temperature=temperature
339
- )
340
- )
341
- else:
342
- # For HuggingFace models, specify the model
343
- completion = await asyncio.get_event_loop().run_in_executor(
344
- None,
345
- lambda: client.chat.completions.create(
346
- model=model_to_use,
347
- messages=messages,
348
- max_tokens=max_new_tokens,
349
- temperature=temperature
350
- )
351
- )
352
-
353
- # Extract the generated text from the chat completion response
354
- return completion.choices[0].message.content
355
-
356
- except Exception as e:
357
- error_message = str(e).lower()
358
- # Check if the error is related to task compatibility or API not supported
359
- if ("not supported for task" in error_message or
360
- "conversational" in error_message or
361
- "chat" in error_message):
362
- logger.info(f"chat_completion not supported, falling back to text_generation: {e}")
363
-
364
- # Fall back to text_generation API
365
- try:
366
- if llm_config and llm_config.get('provider') != 'huggingface':
367
- # For third-party providers
368
- response = await asyncio.get_event_loop().run_in_executor(
369
- None,
370
- lambda: client.text_generation(
371
- prompt,
372
- max_new_tokens=max_new_tokens,
373
- temperature=temperature
374
- )
375
- )
376
- else:
377
- # For HuggingFace models, specify the model
378
- response = await asyncio.get_event_loop().run_in_executor(
379
- None,
380
- lambda: client.text_generation(
381
- prompt,
382
- model=model_to_use,
383
- max_new_tokens=max_new_tokens,
384
- temperature=temperature
385
- )
386
- )
387
- return response
388
-
389
- except Exception as text_error:
390
- logger.error(f"Both chat_completion and text_generation failed: {text_error}")
391
- raise text_error
392
- else:
393
- # Re-raise the original error if it's not a task compatibility issue
394
- logger.error(f"chat_completion failed with non-compatibility error: {e}")
395
- raise e
396
-
397
 
398
  def _add_event(self, video_id: str, event: Dict[str, Any]):
399
  """Add an event to the video's history and maintain the size limit"""
@@ -490,32 +150,13 @@ class VideoGenerationAPI:
490
  temperature = random.uniform(0.68, 0.72)
491
 
492
  while current_attempt <= max_attempts:
493
- prompt = f"""# Instruction
494
- Your response MUST be a YAML object containing a title and description, consistent with what we can find on a video sharing platform.
495
- Format your YAML response with only those fields: "title" (a short string) and "description" (string caption of the scene). Do not add any other field.
496
- In the description field, describe in a very synthetic way the visuals of the first shot (first scene), eg "<STYLE>, medium close-up shot, high angle view. In the foreground a <OPTIONAL AGE> <OPTIONAL GENDER> <CHARACTERS> <ACTIONS>. In the background <DESCRIBE LOCATION, BACKGROUND CHARACTERS, OBJECTS ETC>. The scene is lit by <LIGHTING> <WEATHER>". This is just an example! you MUST replace the <TAGS>!!.
497
- Don't forget to replace <STYLE> etc, by the actual fields!!
498
- For the style, be creative, for instance you can use anything like a "documentary footage", "japanese animation", "movie scene", "tv series", "tv show", "security footage" etc.
499
- If the user ask for something specific eg "movie screencap", "movie scene", "documentary footage" "animation" as a style etc.
500
- Keep it minimalist but still descriptive, don't use bullets points, use simple words, go to the essential to describe style (cinematic, documentary footage, 3D rendering..), camera modes and angles, characters, age, gender, action, location, lighting, country, costume, time, weather, textures, color palette.. etc). Write about 80 words, and use between 2 and 3 sentences.
501
- The most import part is to describe the actions and movements in the scene, so don't forget that!
502
- Don't describe sound, so ever say things like "atmospheric music playing in the background".
503
- Instead describe the visual elements we can see in the background, be precise, (if there are anything, cars, objects, people, bricks, birds, clouds, trees, leaves or grass then say it so etc).
504
- Make the result unique and different from previous search results. ONLY RETURN YAML AND WITH ENGLISH CONTENT, NOT CHINESE - DO NOT ADD ANY OTHER COMMENT!
505
-
506
- # Context
507
- This is attempt {current_attempt}.
508
-
509
- # Input
510
- Describe the first scene/shot for: "{query}".
511
-
512
- # Output
513
-
514
- ```yaml
515
- title: \""""
516
 
517
  try:
518
- raw_yaml_str = await self._generate_text(
519
  prompt,
520
  llm_config=llm_config,
521
  max_new_tokens=200,
@@ -526,31 +167,7 @@ title: \""""
526
 
527
  #logger.info(f"search_video(): raw_yaml_str = {raw_yaml_str}")
528
 
529
- if raw_yaml_str.startswith("```yaml"):
530
- # Remove the "```yaml" at the beginning and closing ```
531
- raw_yaml_str = raw_yaml_str[7:] # Remove "```yaml" (7 characters)
532
- if raw_yaml_str.endswith("```"):
533
- raw_yaml_str = raw_yaml_str[:-3] # Remove closing ```
534
- raw_yaml_str = raw_yaml_str.strip()
535
- elif raw_yaml_str.startswith("```"):
536
- # Remove the "```" at the beginning and closing ```
537
- raw_yaml_str = raw_yaml_str[3:] # Remove opening ```
538
- if raw_yaml_str.endswith("```"):
539
- raw_yaml_str = raw_yaml_str[:-3] # Remove closing ```
540
- raw_yaml_str = raw_yaml_str.strip()
541
- else:
542
- raw_yaml_str = re.sub(r'^\s*\.\s*\n', '', f"title: \"{raw_yaml_str}")
543
-
544
- # Check if it already has a proper YAML structure
545
- if not raw_yaml_str.startswith(('title:', 'title :')):
546
- # Only wrap with title if it doesn't already have one
547
- # Also escape any quotes in the string to prevent YAML parsing issues
548
- escaped_yaml = raw_yaml_str.replace('"', '\\"')
549
- raw_yaml_str = f'title: "{escaped_yaml}"'
550
- else:
551
- # If it already has title:, just clean it up
552
- raw_yaml_str = re.sub(r'^\s*\.\s*\n', '', raw_yaml_str)
553
-
554
  sanitized_yaml = sanitize_yaml_response(raw_yaml_str)
555
 
556
  try:
@@ -631,13 +248,12 @@ title: \""""
631
  async def generate_caption(self, title: str, description: str, llm_config: Optional[dict] = None) -> str:
632
  """Generate detailed caption using HF text generation"""
633
  try:
634
- prompt = f"""Generate a detailed story for a video named: "{title}"
635
- Visual description of the video: {description}.
636
- Instructions: Write the story summary, including the plot, action, what should happen.
637
- Make it around 200-300 words long.
638
- A video can be anything from a tutorial, webcam, trailer, movie, live stream etc."""
639
 
640
- response = await self._generate_text(
641
  prompt,
642
  llm_config=llm_config,
643
  max_new_tokens=180,
@@ -684,67 +300,32 @@ A video can be anything from a tutorial, webcam, trailer, movie, live stream etc
684
  # Create an appropriate prompt based on whether this is the first simulation
685
  chat_section = ""
686
  if chat_messages:
 
687
  chat_section = f"""
688
  People are watching this content right now and have shared their thoughts. Like a game master, please take their feedback as input to adjust the story and/or the scene. Here are their messages:
689
 
690
  {chat_messages}
691
  """
 
 
692
 
693
  if is_first_simulation:
694
- prompt = f"""You are tasked with evolving the narrative for a video titled: "{original_title}"
695
-
696
- Original description:
697
- {original_description}
698
- {chat_section}
699
-
700
- Instructions:
701
- 1. Imagine the next logical scene or development that would follow the current description.
702
- 2. Consider the video context and recent events
703
- 3. Create a natural progression from previous clips
704
- 4. Take into account user suggestions (chat messages) into the scene
705
- 5. IMPORTANT: viewers have shared messages, consider their input in priority to guide your story, and incorporate relevant suggestions or reactions into your narrative evolution.
706
- 6. Keep visual consistency with previous clips (in most cases you should repeat the same exact description of the location, characters etc but only change a few elements. If this is a webcam scenario, don't touch the camera orientation or focus)
707
- 7. Return ONLY the caption text, no additional formatting or explanation
708
- 8. Write in English, about 200 words.
709
- 9. Keep the visual style consistant, but content as well (repeat the style, character, locations, appearance etc..from the previous description, when it makes sense).
710
- 10. Your caption must describe visual elements of the scene in details, including: camera angle and focus, people's appearance, age, look, costumes, clothes, the location visual characteristics and geometry, lighting, action, objects, weather, textures, lighting.
711
- 11. Please write in the same style as the original description, by keeping things brief etc.
712
-
713
- Remember to obey to what users said in the chat history!!
714
-
715
- Now, you must write down the new scene description (don't write a long story! write a synthetic description!):"""
716
  else:
717
- prompt = f"""You are tasked with continuing to evolve the narrative for a video titled: "{original_title}"
718
-
719
- Original description:
720
- {original_description}
721
-
722
- Condensed history of scenes so far:
723
- {condensed_history}
724
-
725
- Current description (most recent scene):
726
- {current_description}
727
- {chat_section}
728
-
729
- Instructions:
730
- 1. Imagine the next logical scene or development that would follow the current description.
731
- 2. Consider the video context and recent events
732
- 3. Create a natural progression from previous clips
733
- 4. Take into account user suggestions (chat messages) into the scene
734
- 5. IMPORTANT: if viewers have shared messages, consider their input in priority to guide your story, and incorporate relevant suggestions or reactions into your narrative evolution.
735
- 6. Keep visual consistency with previous clips (in most cases you should repeat the same exact description of the location, characters etc but only change a few elements. If this is a webcam scenario, don't touch the camera orientation or focus)
736
- 7. Return ONLY the caption text, no additional formatting or explanation
737
- 8. Write in English, about 200 words.
738
- 9. Keep the visual style consistant, but content as well (repeat the style, character, locations, appearance etc..from the previous description, when it makes sense).
739
- 10. Your caption must describe visual elements of the scene in details, including: camera angle and focus, people's appearance, age, look, costumes, clothes, the location visual characteristics and geometry, lighting, action, objects, weather, textures, lighting.
740
- 11. Please write in the same style as the original description, by keeping things brief etc.
741
-
742
- Remember to obey to what users said in the chat history!!
743
-
744
- Now, you must write down the new scene description (don't write a long story! write a synthetic description!):"""
745
 
746
  # Generate the evolved description using the helper method
747
- response = await self._generate_text(
748
  prompt,
749
  llm_config=llm_config,
750
  max_new_tokens=240,
@@ -774,109 +355,25 @@ Now, you must write down the new scene description (don't write a long story! wr
774
  "condensed_history": condensed_history
775
  }
776
 
777
-
778
- def get_config_value(self, role: UserRole, field: str, options: dict = None) -> Any:
779
- """
780
- Get the appropriate config value for a user role.
781
-
782
- Args:
783
- role: The user role ('anon', 'normal', 'pro', 'admin')
784
- field: The config field name to retrieve
785
- options: Optional user-provided options that may override defaults
786
-
787
- Returns:
788
- The config value appropriate for the user's role with respect to
789
- min/max boundaries and user overrides.
790
- """
791
- # Select the appropriate config based on user role
792
- if role == 'admin':
793
- config = CONFIG_FOR_ADMIN_HF_USERS
794
- elif role == 'pro':
795
- config = CONFIG_FOR_PRO_HF_USERS
796
- elif role == 'normal':
797
- config = CONFIG_FOR_STANDARD_HF_USERS
798
- else: # Anonymous users
799
- config = CONFIG_FOR_ANONYMOUS_USERS
800
-
801
- # Get the default value for this field from the config
802
- default_value = config.get(f"default_{field}", None)
803
-
804
- # For fields that have min/max bounds
805
- min_field = f"min_{field}"
806
- max_field = f"max_{field}"
807
-
808
- # Check if min/max constraints exist for this field
809
- has_constraints = min_field in config or max_field in config
810
-
811
- if not has_constraints:
812
- # For fields without constraints, just return the value from config
813
- return default_value
814
-
815
- # Get min and max values from config (if they exist)
816
- min_value = config.get(min_field, None)
817
- max_value = config.get(max_field, None)
818
-
819
- # If user provided options with this field
820
- if options and field in options:
821
- user_value = options[field]
822
-
823
- # Apply constraints if they exist
824
- if min_value is not None and user_value < min_value:
825
- return min_value
826
- if max_value is not None and user_value > max_value:
827
- return max_value
828
-
829
- # If within bounds, use the user's value
830
- return user_value
831
-
832
- # If no user value, return the default
833
- return default_value
834
-
835
  async def _generate_clip_prompt(self, video_id: str, title: str, description: str) -> str:
836
  """Generate a new prompt for the next clip based on event history"""
837
  events = self.video_events.get(video_id, [])
838
  events_json = "\n".join(json.dumps(event) for event in events)
839
 
840
- prompt = f"""# Context and task
841
- Please write the caption for a new clip.
842
-
843
- # Instructions
844
- 1. Consider the video context and recent events
845
- 2. Create a natural progression from previous clips
846
- 3. Take into account user suggestions (chat messages) into the scene
847
- 4. Don't generate hateful, political, violent or sexual content
848
- 5. Keep visual consistency with previous clips (in most cases you should repeat the same exact description of the location, characters etc but only change a few elements. If this is a webcam scenario, don't touch the camera orientation or focus)
849
- 6. Return ONLY the caption text, no additional formatting or explanation
850
- 7. Write in English, about 200 words.
851
- 8. Keep the visual style consistant, but content as well (repeat the style, character, locations, appearance etc.. across scenes, when it makes sense).
852
- 8. Your caption must describe visual elements of the scene in details, including: camera angle and focus, people's appearance, age, look, costumes, clothes, the location visual characteristics and geometry, lighting, action, objects, weather, textures, lighting.
853
-
854
- # Examples
855
- Here is a demo scenario, with fake data:
856
- {{"time": "2024-11-29T13:36:15Z", "event": "new_stream_clip", "caption": "webcam view of a beautiful park, squirrels are playing in the lush grass, blablabla etc... (rest omitted for brevity)"}}
857
- {{"time": "2024-11-29T13:36:20Z", "event": "new_chat_message", "username": "MonkeyLover89", "data": "hi"}}
858
- {{"time": "2024-11-29T13:36:25Z", "event": "new_chat_message", "username": "MonkeyLover89", "data": "more squirrels plz"}}
859
- {{"time": "2024-11-29T13:36:26Z", "event": "new_stream_clip", "caption": "webcam view of a beautiful park, a lot of squirrels are playing in the lush grass, blablabla etc... (rest omitted for brevity)"}}
860
-
861
- # Real scenario and data
862
-
863
- We are inside a video titled "{title}"
864
- The video is described by: "{description}".
865
- Here is a summary of the {len(events)} most recent events:
866
- {events_json}
867
-
868
- # Your response
869
- Your caption:"""
870
 
871
  try:
872
- response = await asyncio.get_event_loop().run_in_executor(
873
- None,
874
- lambda: self.inference_client.text_generation(
875
- prompt,
876
- model=TEXT_MODEL,
877
- max_new_tokens=200,
878
- temperature=0.7
879
- )
880
  )
881
 
882
  # Clean up the response
@@ -952,7 +449,8 @@ Your caption:"""
952
 
953
  start_time = time.time()
954
  # Rest of thumbnail generation logic same as regular video but with optimized settings
955
- result = await self._generate_video_content_with_inference_endpoints(
 
956
  prompt=prompt,
957
  negative_prompt=options.get('negative_prompt', NEGATIVE_PROMPT),
958
  width=width,
@@ -1000,11 +498,11 @@ Your caption:"""
1000
  prompt = f"{clip_caption}, {POSITIVE_PROMPT_SUFFIX}"
1001
 
1002
  # Get the config values based on user role
1003
- width = self.get_config_value(user_role, 'clip_width', options)
1004
- height = self.get_config_value(user_role, 'clip_height', options)
1005
- num_frames = self.get_config_value(user_role, 'num_frames', options)
1006
- num_inference_steps = self.get_config_value(user_role, 'num_inference_steps', options)
1007
- frame_rate = self.get_config_value(user_role, 'clip_framerate', options)
1008
 
1009
  # Get orientation from options
1010
  orientation = options.get('orientation', 'LANDSCAPE')
@@ -1025,7 +523,8 @@ Your caption:"""
1025
  # Generate the video with standard settings
1026
  # historically we used _generate_video_content_with_inference_endpoints,
1027
  # which offers better performance and relability, but costs were spinning out of control
1028
- return await self._generate_video_content_with_inference_endpoints(
 
1029
  prompt=prompt,
1030
  negative_prompt=options.get('negative_prompt', NEGATIVE_PROMPT),
1031
  width=width,
@@ -1037,267 +536,26 @@ Your caption:"""
1037
  options=options,
1038
  user_role=user_role
1039
  )
1040
-
1041
- async def _generate_video_content_with_inference_endpoints(self, prompt: str, negative_prompt: str, width: int,
1042
- height: int, num_frames: int, num_inference_steps: int,
1043
- frame_rate: int, seed: int, options: dict, user_role: UserRole) -> str:
1044
- """
1045
- Internal method to generate video content with specific parameters.
1046
- Used by both regular video generation and thumbnail generation.
1047
- """
1048
- is_thumbnail = options.get('thumbnail', False)
1049
- request_id = options.get('request_id', str(uuid.uuid4())[:8]) # Get or generate request ID
1050
- video_id = options.get('video_id', 'unknown')
1051
-
1052
- # logger.info(f"[{request_id}] Generating {'thumbnail' if is_thumbnail else 'video'} for video {video_id} with seed {seed}")
1053
-
1054
- json_payload = {
1055
- "inputs": {
1056
- "prompt": prompt,
1057
- },
1058
- "parameters": {
1059
- # ------------------- settings for LTX-Video -----------------------
1060
- "negative_prompt": negative_prompt,
1061
- "width": width,
1062
- "height": height,
1063
- "num_frames": num_frames,
1064
- "num_inference_steps": num_inference_steps,
1065
- "guidance_scale": options.get('guidance_scale', GUIDANCE_SCALE),
1066
- "seed": seed,
1067
-
1068
- # ------------------- settings for Varnish -----------------------
1069
- "double_num_frames": False, # <- False for real-time generation
1070
- "fps": frame_rate,
1071
- "super_resolution": False, # <- False for real-time generation
1072
- "grain_amount": 0, # No film grain (on low-res, low-quality generation the effects aren't worth it + it adds weight to the MP4 payload)
1073
- }
1074
- }
1075
-
1076
- # Add thumbnail flag to help with metrics and debugging
1077
- if is_thumbnail:
1078
- json_payload["metadata"] = {
1079
- "is_thumbnail": True,
1080
- "thumbnail_version": "1.0",
1081
- "request_id": request_id
1082
- }
1083
-
1084
- # logger.info(f"[{request_id}] Waiting for an available endpoint...")
1085
- async with self.endpoint_manager.get_endpoint() as endpoint:
1086
- # logger.info(f"[{request_id}] Using endpoint {endpoint.id} for generation")
1087
-
1088
- try:
1089
- async with ClientSession() as session:
1090
- #logger.info(f"[{request_id}] Sending request to endpoint {endpoint.id}: {endpoint.url}")
1091
- start_time = time.time()
1092
-
1093
- # Proceed with actual request
1094
- async with session.post(
1095
- endpoint.url,
1096
- headers={
1097
- "Accept": "application/json",
1098
- "Authorization": f"Bearer {HF_TOKEN}",
1099
- "Content-Type": "application/json",
1100
- "X-Request-ID": request_id # Add request ID to headers
1101
- },
1102
- json=json_payload,
1103
- timeout=12 # Extended timeout for thumbnails (was 8s)
1104
- ) as response:
1105
- request_duration = time.time() - start_time
1106
- #logger.info(f"[{request_id}] Received response from endpoint {endpoint.id} in {request_duration:.2f}s: HTTP {response.status}")
1107
-
1108
- if response.status != 200:
1109
- error_text = await response.text()
1110
- logger.error(f"[{request_id}] Failed response: {error_text}")
1111
- # Mark endpoint as in error state
1112
- await self._mark_endpoint_error(endpoint)
1113
- if "paused" in error_text:
1114
- logger.error(f"[{request_id}] Endpoint is paused")
1115
- return ""
1116
- raise Exception(f"Video generation failed: HTTP {response.status} - {error_text}")
1117
-
1118
- result = await response.json()
1119
- #logger.info(f"[{request_id}] Successfully parsed JSON response")
1120
-
1121
- if "error" in result:
1122
- error_msg = result['error']
1123
- logger.error(f"[{request_id}] Error in response: {error_msg}")
1124
- # Mark endpoint as in error state
1125
- await self._mark_endpoint_error(endpoint)
1126
- if "paused" in str(error_msg).lower():
1127
- logger.error(f"[{request_id}] Endpoint is paused")
1128
- return ""
1129
- raise Exception(f"Video generation failed: {error_msg}")
1130
-
1131
- video_data_uri = result.get("video")
1132
- if not video_data_uri:
1133
- logger.error(f"[{request_id}] No video data in response")
1134
- # Mark endpoint as in error state
1135
- await self._mark_endpoint_error(endpoint)
1136
- raise Exception("No video data in response")
1137
-
1138
- # Get data size
1139
- data_size = len(video_data_uri)
1140
- #logger.info(f"[{request_id}] Received video data: {data_size} chars")
1141
-
1142
- # Reset error count on successful call
1143
- endpoint.error_count = 0
1144
- endpoint.error_until = 0
1145
-
1146
- return video_data_uri
1147
-
1148
- except asyncio.TimeoutError:
1149
- # Handle timeout specifically
1150
- logger.error(f"[{request_id}] Timeout occurred after {time.time() - start_time:.2f}s")
1151
- await self._mark_endpoint_error(endpoint, is_timeout=True)
1152
- return ""
1153
- except Exception as e:
1154
- # Handle all other exceptions
1155
- logger.error(f"[{request_id}] Exception during video generation: {str(e)}")
1156
- if not isinstance(e, asyncio.TimeoutError): # Already handled above
1157
- await self._mark_endpoint_error(endpoint)
1158
- return ""
1159
-
1160
- async def _generate_video_content_with_gradio(self, prompt: str, negative_prompt: str, width: int,
1161
- height: int, num_frames: int, num_inference_steps: int,
1162
- frame_rate: int, seed: int, options: dict, user_role: UserRole) -> str:
1163
- """
1164
- Internal method to generate video content with specific parameters.
1165
- Used by both regular video generation and thumbnail generation.
1166
- This version use our generic gradio space.
1167
- """
1168
- is_thumbnail = options.get('thumbnail', False)
1169
- request_id = options.get('request_id', str(uuid.uuid4())[:8]) # Get or generate request ID
1170
- video_id = options.get('video_id', 'unknown')
1171
-
1172
- # logger.info(f"[{request_id}] Generating {'thumbnail' if is_thumbnail else 'video'} for video {video_id} with seed {seed}")
1173
-
1174
- # Define the synchronous function
1175
- def _sync_gradio_call():
1176
- client = Client("jbilcke-hf/fast-rendering-node", hf_token=HF_TOKEN)
1177
-
1178
- return client.predict(
1179
- prompt=prompt,
1180
- seed=seed,
1181
- fps=8, # frame_rate, # attention, right now tilslop asks for 25 FPS
1182
- width=640, # width, # attention, right now tikslop asks for 1152
1183
- height=320, # height, # attention, righ tnow tikslop asks for 640
1184
- duration=3, # num_frames // frame_rate
1185
- )
1186
-
1187
- # Run in a thread using asyncio.to_thread (Python 3.9+)
1188
- video_data_uri = await asyncio.to_thread(_sync_gradio_call)
1189
-
1190
- return video_data_uri
1191
-
1192
- async def _mark_endpoint_error(self, endpoint: Endpoint, is_timeout: bool = False):
1193
- """Mark an endpoint as being in error state with exponential backoff"""
1194
- async with self.endpoint_manager.lock:
1195
- endpoint.error_count += 1
1196
-
1197
- # Calculate backoff time exponentially based on error count
1198
- # Start with 15 seconds, then 30, 60, etc. up to a max of 5 minutes
1199
- # Using shorter backoffs since generation should be fast
1200
- backoff_seconds = min(15 * (2 ** (endpoint.error_count - 1)), 300)
1201
-
1202
- # Add extra backoff for timeouts which are more indicative of serious issues
1203
- if is_timeout:
1204
- backoff_seconds *= 2
1205
-
1206
- endpoint.error_until = time.time() + backoff_seconds
1207
-
1208
- logger.warning(
1209
- f"Endpoint {endpoint.id} marked as in error state (count: {endpoint.error_count}, "
1210
- f"unavailable until: {datetime.datetime.fromtimestamp(endpoint.error_until).strftime('%H:%M:%S')})"
1211
- )
1212
-
1213
 
1214
  async def handle_chat_message(self, data: dict, ws: web.WebSocketResponse) -> dict:
1215
  """Process and broadcast a chat message"""
1216
  video_id = data.get('videoId')
1217
- request_id = data.get('requestId')
1218
 
1219
- if not video_id:
1220
- return {
1221
- 'action': 'chat_message',
1222
- 'requestId': request_id,
1223
- 'success': False,
1224
- 'error': 'No video ID provided'
1225
- }
1226
-
1227
  # Add chat message to event history
1228
- self._add_event(video_id, {
1229
- "time": datetime.datetime.utcnow().isoformat() + "Z",
1230
- "event": "new_chat_message",
1231
- "username": data.get('username', 'Anonymous'),
1232
- "data": data.get('content', '')
1233
- })
1234
-
1235
- room = self.chat_rooms[video_id]
1236
- message_data = {k: v for k, v in data.items() if k != '_ws'}
1237
- room.add_message(message_data)
1238
-
1239
- for client in room.connected_clients:
1240
- if client != ws:
1241
- try:
1242
- await client.send_json({
1243
- 'action': 'chat_message',
1244
- 'broadcast': True,
1245
- **message_data
1246
- })
1247
- except Exception as e:
1248
- logger.error(f"Failed to broadcast to client: {e}")
1249
- room.connected_clients.remove(client)
1250
 
1251
- return {
1252
- 'action': 'chat_message',
1253
- 'requestId': request_id,
1254
- 'success': True,
1255
- 'message': message_data
1256
- }
1257
 
1258
  async def handle_join_chat(self, data: dict, ws: web.WebSocketResponse) -> dict:
1259
  """Handle a request to join a chat room"""
1260
- video_id = data.get('videoId')
1261
- request_id = data.get('requestId')
1262
-
1263
- if not video_id:
1264
- return {
1265
- 'action': 'join_chat',
1266
- 'requestId': request_id,
1267
- 'success': False,
1268
- 'error': 'No video ID provided'
1269
- }
1270
-
1271
- room = self.chat_rooms[video_id]
1272
- room.connected_clients.add(ws)
1273
- recent_messages = room.get_recent_messages()
1274
-
1275
- return {
1276
- 'action': 'join_chat',
1277
- 'requestId': request_id,
1278
- 'success': True,
1279
- 'messages': recent_messages
1280
- }
1281
 
1282
  async def handle_leave_chat(self, data: dict, ws: web.WebSocketResponse) -> dict:
1283
  """Handle a request to leave a chat room"""
1284
- video_id = data.get('videoId')
1285
- request_id = data.get('requestId')
1286
-
1287
- if not video_id:
1288
- return {
1289
- 'action': 'leave_chat',
1290
- 'requestId': request_id,
1291
- 'success': False,
1292
- 'error': 'No video ID provided'
1293
- }
1294
-
1295
- room = self.chat_rooms[video_id]
1296
- if ws in room.connected_clients:
1297
- room.connected_clients.remove(ws)
1298
-
1299
- return {
1300
- 'action': 'leave_chat',
1301
- 'requestId': request_id,
1302
- 'success': True
1303
- }
 
4
  import re
5
  import base64
6
  import uuid
7
+ from typing import Dict, Any, Optional, List
 
 
8
  import asyncio
9
  import time
10
  import datetime
 
11
  from collections import defaultdict
12
  from aiohttp import web, ClientSession
13
+ from huggingface_hub import HfApi
14
  from gradio_client import Client
15
  import random
16
  import yaml
17
  import json
18
 
19
+ from .api_config import *
20
+ from .models import UserRole
21
+ from .endpoint_manager import EndpointManager
22
+ from .utils import generate_seed, sanitize_yaml_response
23
+ from .chat import ChatManager
24
+ from .config_utils import get_config_value
25
+ from .video_utils import (
26
+ generate_video_content_with_inference_endpoints,
27
+ generate_video_content_with_gradio
28
+ )
29
+ from .llm_utils import (
30
+ get_inference_client,
31
+ generate_text,
32
+ SEARCH_VIDEO_PROMPT_TEMPLATE,
33
+ GENERATE_CAPTION_PROMPT_TEMPLATE,
34
+ SIMULATE_VIDEO_FIRST_PROMPT_TEMPLATE,
35
+ SIMULATE_VIDEO_CONTINUE_PROMPT_TEMPLATE,
36
+ GENERATE_CLIP_PROMPT_TEMPLATE
37
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ # Configure logging
40
+ from .logging_utils import get_logger
41
+ logger = get_logger(__name__)
 
 
42
 
 
 
 
 
43
 
 
 
44
 
45
  class VideoGenerationAPI:
46
  def __init__(self):
47
  self.hf_api = HfApi(token=HF_TOKEN)
48
  self.endpoint_manager = EndpointManager()
49
  self.active_requests: Dict[str, asyncio.Future] = {}
50
+ self.chat_manager = ChatManager()
51
  self.video_events: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
52
  self.event_history_limit = 50
53
  # Cache for user roles to avoid repeated API calls
54
  self.user_role_cache: Dict[str, Dict[str, Any]] = {}
55
  # Cache expiration time (10 minutes)
56
  self.cache_expiration = 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  def _add_event(self, video_id: str, event: Dict[str, Any]):
59
  """Add an event to the video's history and maintain the size limit"""
 
150
  temperature = random.uniform(0.68, 0.72)
151
 
152
  while current_attempt <= max_attempts:
153
+ prompt = SEARCH_VIDEO_PROMPT_TEMPLATE.format(
154
+ current_attempt=current_attempt,
155
+ query=query
156
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
  try:
159
+ raw_yaml_str = await generate_text(
160
  prompt,
161
  llm_config=llm_config,
162
  max_new_tokens=200,
 
167
 
168
  #logger.info(f"search_video(): raw_yaml_str = {raw_yaml_str}")
169
 
170
+ # All pre-processing is now handled in sanitize_yaml_response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  sanitized_yaml = sanitize_yaml_response(raw_yaml_str)
172
 
173
  try:
 
248
  async def generate_caption(self, title: str, description: str, llm_config: Optional[dict] = None) -> str:
249
  """Generate detailed caption using HF text generation"""
250
  try:
251
+ prompt = GENERATE_CAPTION_PROMPT_TEMPLATE.format(
252
+ title=title,
253
+ description=description
254
+ )
 
255
 
256
+ response = await generate_text(
257
  prompt,
258
  llm_config=llm_config,
259
  max_new_tokens=180,
 
300
  # Create an appropriate prompt based on whether this is the first simulation
301
  chat_section = ""
302
  if chat_messages:
303
+ logger.info(f"CHAT_DEBUG: Server received chat messages for simulation: {chat_messages}")
304
  chat_section = f"""
305
  People are watching this content right now and have shared their thoughts. Like a game master, please take their feedback as input to adjust the story and/or the scene. Here are their messages:
306
 
307
  {chat_messages}
308
  """
309
+ else:
310
+ logger.info("CHAT_DEBUG: Server simulation called with no chat messages")
311
 
312
  if is_first_simulation:
313
+ prompt = SIMULATE_VIDEO_FIRST_PROMPT_TEMPLATE.format(
314
+ original_title=original_title,
315
+ original_description=original_description,
316
+ chat_section=chat_section
317
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  else:
319
+ prompt = SIMULATE_VIDEO_CONTINUE_PROMPT_TEMPLATE.format(
320
+ original_title=original_title,
321
+ original_description=original_description,
322
+ condensed_history=condensed_history,
323
+ current_description=current_description,
324
+ chat_section=chat_section
325
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
 
327
  # Generate the evolved description using the helper method
328
+ response = await generate_text(
329
  prompt,
330
  llm_config=llm_config,
331
  max_new_tokens=240,
 
355
  "condensed_history": condensed_history
356
  }
357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  async def _generate_clip_prompt(self, video_id: str, title: str, description: str) -> str:
359
  """Generate a new prompt for the next clip based on event history"""
360
  events = self.video_events.get(video_id, [])
361
  events_json = "\n".join(json.dumps(event) for event in events)
362
 
363
+ prompt = GENERATE_CLIP_PROMPT_TEMPLATE.format(
364
+ title=title,
365
+ description=description,
366
+ event_count=len(events),
367
+ events_json=events_json
368
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
 
370
  try:
371
+ # Use the imported generate_text function instead
372
+ response = await generate_text(
373
+ prompt,
374
+ llm_config=None, # Use default config
375
+ max_new_tokens=200,
376
+ temperature=0.7
 
 
377
  )
378
 
379
  # Clean up the response
 
449
 
450
  start_time = time.time()
451
  # Rest of thumbnail generation logic same as regular video but with optimized settings
452
+ result = await generate_video_content_with_inference_endpoints(
453
+ self.endpoint_manager,
454
  prompt=prompt,
455
  negative_prompt=options.get('negative_prompt', NEGATIVE_PROMPT),
456
  width=width,
 
498
  prompt = f"{clip_caption}, {POSITIVE_PROMPT_SUFFIX}"
499
 
500
  # Get the config values based on user role
501
+ width = get_config_value(user_role, 'clip_width', options)
502
+ height = get_config_value(user_role, 'clip_height', options)
503
+ num_frames = get_config_value(user_role, 'num_frames', options)
504
+ num_inference_steps = get_config_value(user_role, 'num_inference_steps', options)
505
+ frame_rate = get_config_value(user_role, 'clip_framerate', options)
506
 
507
  # Get orientation from options
508
  orientation = options.get('orientation', 'LANDSCAPE')
 
523
  # Generate the video with standard settings
524
  # historically we used _generate_video_content_with_inference_endpoints,
525
  # which offers better performance and relability, but costs were spinning out of control
526
+ return await generate_video_content_with_inference_endpoints(
527
+ self.endpoint_manager,
528
  prompt=prompt,
529
  negative_prompt=options.get('negative_prompt', NEGATIVE_PROMPT),
530
  width=width,
 
536
  options=options,
537
  user_role=user_role
538
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
 
540
  async def handle_chat_message(self, data: dict, ws: web.WebSocketResponse) -> dict:
541
  """Process and broadcast a chat message"""
542
  video_id = data.get('videoId')
 
543
 
 
 
 
 
 
 
 
 
544
  # Add chat message to event history
545
+ if video_id:
546
+ self._add_event(video_id, {
547
+ "time": datetime.datetime.utcnow().isoformat() + "Z",
548
+ "event": "new_chat_message",
549
+ "username": data.get('username', 'Anonymous'),
550
+ "data": data.get('content', '')
551
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
552
 
553
+ return await self.chat_manager.handle_chat_message(data, ws)
 
 
 
 
 
554
 
555
  async def handle_join_chat(self, data: dict, ws: web.WebSocketResponse) -> dict:
556
  """Handle a request to join a chat room"""
557
+ return await self.chat_manager.handle_join_chat(data, ws)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558
 
559
  async def handle_leave_chat(self, data: dict, ws: web.WebSocketResponse) -> dict:
560
  """Handle a request to leave a chat room"""
561
+ return await self.chat_manager.handle_leave_chat(data, ws)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
api_metrics.py β†’ server/api_metrics.py RENAMED
File without changes
api_session.py β†’ server/api_session.py RENAMED
@@ -5,9 +5,10 @@ from aiohttp import web, WSMsgType
5
  import json
6
  import time
7
  import datetime
8
- from api_core import VideoGenerationAPI
 
9
 
10
- logger = logging.getLogger(__name__)
11
 
12
  class UserSession:
13
  """
@@ -50,13 +51,14 @@ class UserSession:
50
  async def start(self):
51
  """Start all the queue processors for this session"""
52
  # Start background tasks for handling different request types
 
53
  self.background_tasks = [
54
  asyncio.create_task(self._process_chat_queue()),
55
  asyncio.create_task(self._process_video_queue()),
56
  asyncio.create_task(self._process_search_queue()),
57
  asyncio.create_task(self._process_simulation_queue()) # New worker for simulation requests
58
  ]
59
- logger.info(f"Started session for user {self.user_id} with role {self.user_role}")
60
 
61
  async def stop(self):
62
  """Stop all background tasks for this session"""
@@ -114,88 +116,127 @@ class UserSession:
114
 
115
  async def _process_video_queue(self):
116
  """Process multiple video generation requests in parallel for this user"""
117
- from api_config import VIDEO_ROUND_ROBIN_ENDPOINT_URLS
118
-
119
- active_tasks = set()
120
- # Set a per-user concurrent limit based on role
121
- max_concurrent = len(VIDEO_ROUND_ROBIN_ENDPOINT_URLS)
122
- if self.user_role == 'anon':
123
- max_concurrent = min(2, max_concurrent) # Limit anonymous users
124
- elif self.user_role == 'normal':
125
- max_concurrent = min(4, max_concurrent) # Standard users
126
- # Pro and admin can use all endpoints
127
-
128
- async def process_single_request(data):
129
  try:
130
- title = data.get('title', '')
131
- description = data.get('description', '')
132
- video_prompt_prefix = data.get('video_prompt_prefix', '')
133
- options = data.get('options', {})
134
-
135
- # Pass the user role to generate_video
136
- video_data = await self.shared_api.generate_video(
137
- title, description, video_prompt_prefix, options, self.user_role
138
- )
139
-
140
- result = {
141
- 'action': 'generate_video',
142
- 'requestId': data.get('requestId'),
143
- 'success': True,
144
- 'video': video_data,
145
- }
146
-
147
- await self.ws.send_json(result)
148
-
149
- # Update metrics
150
- self.request_counts['video'] += 1
151
- self.last_request_times['video'] = time.time()
152
-
153
- except Exception as e:
154
- logger.error(f"Error processing video request for user {self.user_id}: {e}")
155
  try:
156
- await self.ws.send_json({
157
- 'action': 'generate_video',
158
- 'requestId': data.get('requestId'),
159
- 'success': False,
160
- 'error': f'Video generation error: {str(e)}'
161
- })
162
- except Exception as send_error:
163
- logger.error(f"Error sending error response: {send_error}")
164
- finally:
165
- active_tasks.discard(asyncio.current_task())
166
-
167
- while True:
168
- # Clean up completed tasks
169
- active_tasks = {task for task in active_tasks if not task.done()}
170
 
171
- # Start new tasks if we have capacity
172
- while len(active_tasks) < max_concurrent:
 
 
 
 
 
 
 
 
 
173
  try:
174
- # Use try_get to avoid blocking if queue is empty
175
- data = await asyncio.wait_for(self.video_queue.get(), timeout=0.1)
 
 
 
 
 
176
 
177
- # Create and start new task
178
- task = asyncio.create_task(process_single_request(data))
179
- active_tasks.add(task)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
- except asyncio.TimeoutError:
182
- # No items in queue, break inner loop
183
- break
184
  except Exception as e:
185
- logger.error(f"Error creating video generation task for user {self.user_id}: {e}")
186
- break
187
-
188
- # Wait a short time before checking queue again
189
- await asyncio.sleep(0.1)
 
 
 
 
 
 
 
 
190
 
191
- # Handle any completed tasks' errors
192
- for task in list(active_tasks):
193
- if task.done():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  try:
195
- await task
 
 
 
 
 
 
 
 
 
 
 
 
196
  except Exception as e:
197
- logger.error(f"Task failed with error for user {self.user_id}: {e}")
198
- active_tasks.discard(task)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
  async def _process_search_queue(self):
201
  """Medium priority queue for search operations"""
 
5
  import json
6
  import time
7
  import datetime
8
+ from .api_core import VideoGenerationAPI
9
+ from .logging_utils import get_logger
10
 
11
+ logger = get_logger(__name__)
12
 
13
  class UserSession:
14
  """
 
51
  async def start(self):
52
  """Start all the queue processors for this session"""
53
  # Start background tasks for handling different request types
54
+ logger.info(f"Creating background tasks for user {self.user_id}")
55
  self.background_tasks = [
56
  asyncio.create_task(self._process_chat_queue()),
57
  asyncio.create_task(self._process_video_queue()),
58
  asyncio.create_task(self._process_search_queue()),
59
  asyncio.create_task(self._process_simulation_queue()) # New worker for simulation requests
60
  ]
61
+ logger.info(f"Started session for user {self.user_id} with role {self.user_role}, created {len(self.background_tasks)} background tasks")
62
 
63
  async def stop(self):
64
  """Stop all background tasks for this session"""
 
116
 
117
  async def _process_video_queue(self):
118
  """Process multiple video generation requests in parallel for this user"""
119
+ try:
 
 
 
 
 
 
 
 
 
 
 
120
  try:
121
+ from .api_config import VIDEO_ROUND_ROBIN_ENDPOINT_URLS
122
+ except ImportError:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  try:
124
+ from server.api_config import VIDEO_ROUND_ROBIN_ENDPOINT_URLS
125
+ except ImportError:
126
+ logger.error(f"Failed to import VIDEO_ROUND_ROBIN_ENDPOINT_URLS for user {self.user_id}")
127
+ return
 
 
 
 
 
 
 
 
 
 
128
 
129
+ active_tasks = set()
130
+ # Set a per-user concurrent limit based on role
131
+ max_concurrent = len(VIDEO_ROUND_ROBIN_ENDPOINT_URLS)
132
+ if self.user_role == 'anon':
133
+ max_concurrent = min(2, max_concurrent) # Limit anonymous users
134
+ elif self.user_role == 'normal':
135
+ max_concurrent = min(4, max_concurrent) # Standard users
136
+ # Pro and admin can use all endpoints
137
+
138
+ async def process_single_request(data):
139
+ request_id = data.get('requestId', 'unknown')
140
  try:
141
+ title = data.get('title', '')
142
+ description = data.get('description', '')
143
+ video_prompt_prefix = data.get('video_prompt_prefix', '')
144
+ options = data.get('options', {})
145
+
146
+ #logger.info(f"Starting video generation for user {self.user_id}: title='{title[:50]}...', role={self.user_role}")
147
+ start_time = time.time()
148
 
149
+ # Pass the user role to generate_video
150
+ video_data = await self.shared_api.generate_video(
151
+ title, description, video_prompt_prefix, options, self.user_role
152
+ )
153
+
154
+ generation_time = time.time() - start_time
155
+ logger.info(f"generated clip in {generation_time:.2f}s (len: {len(video_data) if video_data else 0})")
156
+
157
+ result = {
158
+ 'action': 'generate_video',
159
+ 'requestId': data.get('requestId'),
160
+ 'success': True,
161
+ 'video': video_data,
162
+ }
163
+
164
+ #logger.info(f"Sending video generation response to user {self.user_id}")
165
+ await self.ws.send_json(result)
166
+
167
+ # Update metrics
168
+ self.request_counts['video'] += 1
169
+ self.last_request_times['video'] = time.time()
170
 
 
 
 
171
  except Exception as e:
172
+ logger.error(f"Error processing video request for user {self.user_id}: {e}")
173
+ try:
174
+ logger.info(f"Sending error response to user {self.user_id}")
175
+ await self.ws.send_json({
176
+ 'action': 'generate_video',
177
+ 'requestId': data.get('requestId'),
178
+ 'success': False,
179
+ 'error': f'Video generation error: {str(e)}'
180
+ })
181
+ except Exception as send_error:
182
+ logger.error(f"Error sending error response: {send_error}")
183
+ finally:
184
+ active_tasks.discard(asyncio.current_task())
185
 
186
+ logger.info(f"Video queue processor started for user {self.user_id} with max_concurrent={max_concurrent}")
187
+
188
+ while True:
189
+ # Clean up completed tasks
190
+ active_tasks = {task for task in active_tasks if not task.done()}
191
+
192
+ # Log queue processing activity every few iterations
193
+ if hasattr(self, '_queue_debug_counter'):
194
+ self._queue_debug_counter += 1
195
+ else:
196
+ self._queue_debug_counter = 1
197
+
198
+ if self._queue_debug_counter % 50 == 0: # Log every 5 seconds (50 * 0.1s)
199
+ queue_size = self.video_queue.qsize()
200
+ # let's hide this log, it is too verbose
201
+ #logger.info(f"Video queue processor heartbeat for user {self.user_id}: queue_size={queue_size}, active_tasks={len(active_tasks)}/{max_concurrent}")
202
+
203
+ # Start new tasks if we have capacity
204
+ while len(active_tasks) < max_concurrent:
205
  try:
206
+ # Use try_get to avoid blocking if queue is empty
207
+ data = await asyncio.wait_for(self.video_queue.get(), timeout=0.1)
208
+
209
+ request_id = data.get('requestId', 'unknown')
210
+ #logger.info(f"[{request_id}] Picked up video request from queue for user {self.user_id}, creating task (active: {len(active_tasks)}/{max_concurrent})")
211
+
212
+ # Create and start new task
213
+ task = asyncio.create_task(process_single_request(data))
214
+ active_tasks.add(task)
215
+
216
+ except asyncio.TimeoutError:
217
+ # No items in queue, break inner loop
218
+ break
219
  except Exception as e:
220
+ logger.error(f"Error creating video generation task for user {self.user_id}: {e}")
221
+ break
222
+
223
+ # Wait a short time before checking queue again
224
+ await asyncio.sleep(0.1)
225
+
226
+ # Handle any completed tasks' errors
227
+ for task in list(active_tasks):
228
+ if task.done():
229
+ try:
230
+ await task
231
+ except Exception as e:
232
+ logger.error(f"Task failed with error for user {self.user_id}: {e}")
233
+ active_tasks.discard(task)
234
+
235
+ except Exception as e:
236
+ logger.error(f"Video queue processor crashed for user {self.user_id}: {e}")
237
+ import traceback
238
+ logger.error(f"Video queue processor traceback: {traceback.format_exc()}")
239
+ raise # Re-raise to ensure the error is visible
240
 
241
  async def _process_search_queue(self):
242
  """Medium priority queue for search operations"""
server/chat.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Chat-related functionality for video interactions.
3
+ """
4
+ import datetime
5
+ import logging
6
+ from collections import defaultdict
7
+ from typing import Dict, List, Any
8
+ from aiohttp import web
9
+ from .models import ChatRoom
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class ChatManager:
15
+ """Manages multiple chat rooms for different videos."""
16
+
17
+ def __init__(self):
18
+ self.chat_rooms = defaultdict(ChatRoom)
19
+
20
+ async def handle_chat_message(self, data: dict, ws: web.WebSocketResponse) -> dict:
21
+ """Process and broadcast a chat message"""
22
+ video_id = data.get('videoId')
23
+ request_id = data.get('requestId')
24
+
25
+ if not video_id:
26
+ return {
27
+ 'action': 'chat_message',
28
+ 'requestId': request_id,
29
+ 'success': False,
30
+ 'error': 'No video ID provided'
31
+ }
32
+
33
+ room = self.chat_rooms[video_id]
34
+ message_data = {k: v for k, v in data.items() if k != '_ws'}
35
+ room.add_message(message_data)
36
+
37
+ for client in room.connected_clients:
38
+ if client != ws:
39
+ try:
40
+ await client.send_json({
41
+ 'action': 'chat_message',
42
+ 'broadcast': True,
43
+ **message_data
44
+ })
45
+ except Exception as e:
46
+ logger.error(f"Failed to broadcast to client: {e}")
47
+ room.connected_clients.remove(client)
48
+
49
+ return {
50
+ 'action': 'chat_message',
51
+ 'requestId': request_id,
52
+ 'success': True,
53
+ 'message': message_data
54
+ }
55
+
56
+ async def handle_join_chat(self, data: dict, ws: web.WebSocketResponse) -> dict:
57
+ """Handle a request to join a chat room"""
58
+ video_id = data.get('videoId')
59
+ request_id = data.get('requestId')
60
+
61
+ if not video_id:
62
+ return {
63
+ 'action': 'join_chat',
64
+ 'requestId': request_id,
65
+ 'success': False,
66
+ 'error': 'No video ID provided'
67
+ }
68
+
69
+ room = self.chat_rooms[video_id]
70
+ room.connected_clients.add(ws)
71
+ recent_messages = room.get_recent_messages()
72
+
73
+ return {
74
+ 'action': 'join_chat',
75
+ 'requestId': request_id,
76
+ 'success': True,
77
+ 'messages': recent_messages
78
+ }
79
+
80
+ async def handle_leave_chat(self, data: dict, ws: web.WebSocketResponse) -> dict:
81
+ """Handle a request to leave a chat room"""
82
+ video_id = data.get('videoId')
83
+ request_id = data.get('requestId')
84
+
85
+ if not video_id:
86
+ return {
87
+ 'action': 'leave_chat',
88
+ 'requestId': request_id,
89
+ 'success': False,
90
+ 'error': 'No video ID provided'
91
+ }
92
+
93
+ room = self.chat_rooms[video_id]
94
+ if ws in room.connected_clients:
95
+ room.connected_clients.remove(ws)
96
+
97
+ return {
98
+ 'action': 'leave_chat',
99
+ 'requestId': request_id,
100
+ 'success': True
101
+ }
server/config_utils.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration utilities for user role-based settings.
3
+ """
4
+ from typing import Any, Dict, Optional
5
+ from .models import UserRole
6
+ from .api_config import (
7
+ CONFIG_FOR_ADMIN_HF_USERS,
8
+ CONFIG_FOR_PRO_HF_USERS,
9
+ CONFIG_FOR_STANDARD_HF_USERS,
10
+ CONFIG_FOR_ANONYMOUS_USERS
11
+ )
12
+
13
+
14
+ def get_config_value(role: UserRole, field: str, options: Optional[Dict[str, Any]] = None) -> Any:
15
+ """
16
+ Get the appropriate config value for a user role.
17
+
18
+ Args:
19
+ role: The user role ('anon', 'normal', 'pro', 'admin')
20
+ field: The config field name to retrieve
21
+ options: Optional user-provided options that may override defaults
22
+
23
+ Returns:
24
+ The config value appropriate for the user's role with respect to
25
+ min/max boundaries and user overrides.
26
+ """
27
+ # Select the appropriate config based on user role
28
+ if role == 'admin':
29
+ config = CONFIG_FOR_ADMIN_HF_USERS
30
+ elif role == 'pro':
31
+ config = CONFIG_FOR_PRO_HF_USERS
32
+ elif role == 'normal':
33
+ config = CONFIG_FOR_STANDARD_HF_USERS
34
+ else: # Anonymous users
35
+ config = CONFIG_FOR_ANONYMOUS_USERS
36
+
37
+ # Get the default value for this field from the config
38
+ default_value = config.get(f"default_{field}", None)
39
+
40
+ # For fields that have min/max bounds
41
+ min_field = f"min_{field}"
42
+ max_field = f"max_{field}"
43
+
44
+ # Check if min/max constraints exist for this field
45
+ has_constraints = min_field in config or max_field in config
46
+
47
+ if not has_constraints:
48
+ # For fields without constraints, just return the value from config
49
+ return default_value
50
+
51
+ # Get min and max values from config (if they exist)
52
+ min_value = config.get(min_field, None)
53
+ max_value = config.get(max_field, None)
54
+
55
+ # If user provided options with this field
56
+ if options and field in options:
57
+ user_value = options[field]
58
+
59
+ # Apply constraints if they exist
60
+ if min_value is not None and user_value < min_value:
61
+ return min_value
62
+ if max_value is not None and user_value > max_value:
63
+ return max_value
64
+
65
+ # If within bounds, use the user's value
66
+ return user_value
67
+
68
+ # If no user value, return the default
69
+ return default_value
server/endpoint_manager.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Endpoint management for video generation services.
3
+ """
4
+ import time
5
+ import datetime
6
+ import logging
7
+ from asyncio import Lock
8
+ from contextlib import asynccontextmanager
9
+ from typing import List
10
+ from .models import Endpoint
11
+ from .api_config import VIDEO_ROUND_ROBIN_ENDPOINT_URLS
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class EndpointManager:
17
+ """Manages multiple video generation endpoints with load balancing and error handling."""
18
+
19
+ def __init__(self):
20
+ self.endpoints: List[Endpoint] = []
21
+ self.lock = Lock()
22
+ self.initialize_endpoints()
23
+ self.last_used_index = -1 # Track the last used endpoint for round-robin
24
+
25
+ def initialize_endpoints(self):
26
+ """Initialize the list of endpoints"""
27
+ for i, url in enumerate(VIDEO_ROUND_ROBIN_ENDPOINT_URLS):
28
+ endpoint = Endpoint(id=i + 1, url=url)
29
+ self.endpoints.append(endpoint)
30
+
31
+ def _get_next_free_endpoint(self):
32
+ """Get the next available non-busy endpoint, or oldest endpoint if all are busy"""
33
+ current_time = time.time()
34
+
35
+ # First priority: Get any non-busy and non-error endpoint
36
+ free_endpoints = [
37
+ ep for ep in self.endpoints
38
+ if not ep.busy and current_time > ep.error_until
39
+ ]
40
+
41
+ if free_endpoints:
42
+ # Return the least recently used free endpoint
43
+ return min(free_endpoints, key=lambda ep: ep.last_used)
44
+
45
+ # Second priority: If all busy/error, use round-robin but skip error endpoints
46
+ tried_count = 0
47
+ next_index = self.last_used_index
48
+
49
+ while tried_count < len(self.endpoints):
50
+ next_index = (next_index + 1) % len(self.endpoints)
51
+ tried_count += 1
52
+
53
+ # If endpoint is not in error state, use it
54
+ if current_time > self.endpoints[next_index].error_until:
55
+ self.last_used_index = next_index
56
+ return self.endpoints[next_index]
57
+
58
+ # If all endpoints are in error state, use the one with earliest error expiry
59
+ self.last_used_index = next_index
60
+ return min(self.endpoints, key=lambda ep: ep.error_until)
61
+
62
+ @asynccontextmanager
63
+ async def get_endpoint(self, max_wait_time: int = 10):
64
+ """Get the next available endpoint using a context manager"""
65
+ start_time = time.time()
66
+ endpoint = None
67
+
68
+ try:
69
+ while True:
70
+ if time.time() - start_time > max_wait_time:
71
+ raise TimeoutError(f"Could not acquire an endpoint within {max_wait_time} seconds")
72
+
73
+ async with self.lock:
74
+ # Get the next available endpoint using our selection strategy
75
+ endpoint = self._get_next_free_endpoint()
76
+
77
+ # Mark it as busy
78
+ endpoint.busy = True
79
+ endpoint.last_used = time.time()
80
+ break
81
+
82
+ yield endpoint
83
+
84
+ finally:
85
+ if endpoint:
86
+ async with self.lock:
87
+ endpoint.busy = False
88
+ endpoint.last_used = time.time()
89
+
90
+ async def mark_endpoint_error(self, endpoint: Endpoint, is_timeout: bool = False):
91
+ """Mark an endpoint as being in error state with exponential backoff"""
92
+ async with self.lock:
93
+ endpoint.error_count += 1
94
+
95
+ # Calculate backoff time exponentially based on error count
96
+ # Start with 15 seconds, then 30, 60, etc. up to a max of 5 minutes
97
+ # Using shorter backoffs since generation should be fast
98
+ backoff_seconds = min(15 * (2 ** (endpoint.error_count - 1)), 300)
99
+
100
+ # Add extra backoff for timeouts which are more indicative of serious issues
101
+ if is_timeout:
102
+ backoff_seconds *= 2
103
+
104
+ endpoint.error_until = time.time() + backoff_seconds
105
+
106
+ logger.warning(
107
+ f"Endpoint {endpoint.id} marked as in error state (count: {endpoint.error_count}, "
108
+ f"unavailable until: {datetime.datetime.fromtimestamp(endpoint.error_until).strftime('%H:%M:%S')})"
109
+ )
server/llm_utils.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LLM-related utilities, templates, and text generation functions.
3
+ """
4
+ import asyncio
5
+ import logging
6
+ from typing import Optional, Dict, Any
7
+ from huggingface_hub import InferenceClient
8
+ from .api_config import HF_TOKEN, TEXT_MODEL
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ # LLM prompt templates
14
+ SEARCH_VIDEO_PROMPT_TEMPLATE = """# Instruction
15
+ Your response MUST be a YAML object containing a title and description, consistent with what we can find on a video sharing platform.
16
+ Format your YAML response with only those fields: "title" (a short string) and "description" (string caption of the scene). Do not add any other field.
17
+ In the description field, describe in a very synthetic way the visuals of the first shot (first scene), eg "<STYLE>, medium close-up shot, high angle view. In the foreground a <OPTIONAL AGE> <OPTIONAL GENDER> <CHARACTERS> <ACTIONS>. In the background <DESCRIBE LOCATION, BACKGROUND CHARACTERS, OBJECTS ETC>. The scene is lit by <LIGHTING> <WEATHER>". This is just an example! you MUST replace the <TAGS>!!.
18
+ Don't forget to replace <STYLE> etc, by the actual fields!!
19
+ For the style, be creative, for instance you can use anything like a "documentary footage", "japanese animation", "movie scene", "tv series", "tv show", "security footage" etc.
20
+ If the user ask for something specific eg "movie screencap", "movie scene", "documentary footage" "animation" as a style etc.
21
+ Keep it minimalist but still descriptive, don't use bullets points, use simple words, go to the essential to describe style (cinematic, documentary footage, 3D rendering..), camera modes and angles, characters, age, gender, action, location, lighting, country, costume, time, weather, textures, color palette.. etc). Write about 80 words, and use between 2 and 3 sentences.
22
+ The most import part is to describe the actions and movements in the scene, so don't forget that!
23
+ Don't describe sound, so ever say things like "atmospheric music playing in the background".
24
+ Instead describe the visual elements we can see in the background, be precise, (if there are anything, cars, objects, people, bricks, birds, clouds, trees, leaves or grass then say it so etc).
25
+ Make the result unique and different from previous search results. ONLY RETURN YAML AND WITH ENGLISH CONTENT, NOT CHINESE - DO NOT ADD ANY OTHER COMMENT!
26
+
27
+ # Context
28
+ This is attempt {current_attempt}.
29
+
30
+ # Input
31
+ Describe the first scene/shot for: "{query}".
32
+
33
+ # Output
34
+
35
+ ```yaml
36
+ title: \""""
37
+
38
+ GENERATE_CAPTION_PROMPT_TEMPLATE = """Generate a detailed story for a video named: "{title}"
39
+ Visual description of the video: {description}.
40
+ Instructions: Write the story summary, including the plot, action, what should happen.
41
+ Make it around 200-300 words long.
42
+ A video can be anything from a tutorial, webcam, trailer, movie, live stream etc."""
43
+
44
+ SIMULATE_VIDEO_FIRST_PROMPT_TEMPLATE = """You are tasked with evolving the narrative for a video titled: "{original_title}"
45
+
46
+ Original description:
47
+ {original_description}
48
+ {chat_section}
49
+
50
+ Instructions:
51
+ 1. Imagine the next logical scene or development that would follow the current description.
52
+ 2. Consider the video context and recent events
53
+ 3. Create a natural progression from previous clips
54
+ 4. Take into account user suggestions (chat messages) into the scene
55
+ 5. IMPORTANT: viewers have shared messages, consider their input in priority to guide your story, and incorporate relevant suggestions or reactions into your narrative evolution.
56
+ 6. Keep visual consistency with previous clips (in most cases you should repeat the same exact description of the location, characters etc but only change a few elements. If this is a webcam scenario, don't touch the camera orientation or focus)
57
+ 7. Return ONLY the caption text, no additional formatting or explanation
58
+ 8. Write in English, about 200 words.
59
+ 9. Keep the visual style consistant, but content as well (repeat the style, character, locations, appearance etc..from the previous description, when it makes sense).
60
+ 10. Your caption must describe visual elements of the scene in details, including: camera angle and focus, people's appearance, age, look, costumes, clothes, the location visual characteristics and geometry, lighting, action, objects, weather, textures, lighting.
61
+ 11. Please write in the same style as the original description, by keeping things brief etc.
62
+
63
+ Remember to obey to what users said in the chat history!!
64
+
65
+ Now, you must write down the new scene description (don't write a long story! write a synthetic description!):"""
66
+
67
+ SIMULATE_VIDEO_CONTINUE_PROMPT_TEMPLATE = """You are tasked with continuing to evolve the narrative for a video titled: "{original_title}"
68
+
69
+ Original description:
70
+ {original_description}
71
+
72
+ Condensed history of scenes so far:
73
+ {condensed_history}
74
+
75
+ Current description (most recent scene):
76
+ {current_description}
77
+ {chat_section}
78
+
79
+ Instructions:
80
+ 1. Imagine the next logical scene or development that would follow the current description.
81
+ 2. Consider the video context and recent events
82
+ 3. Create a natural progression from previous clips
83
+ 4. Take into account user suggestions (chat messages) into the scene
84
+ 5. IMPORTANT: if viewers have shared messages, consider their input in priority to guide your story, and incorporate relevant suggestions or reactions into your narrative evolution.
85
+ 6. Keep visual consistency with previous clips (in most cases you should repeat the same exact description of the location, characters etc but only change a few elements. If this is a webcam scenario, don't touch the camera orientation or focus)
86
+ 7. Return ONLY the caption text, no additional formatting or explanation
87
+ 8. Write in English, about 200 words.
88
+ 9. Keep the visual style consistant, but content as well (repeat the style, character, locations, appearance etc..from the previous description, when it makes sense).
89
+ 10. Your caption must describe visual elements of the scene in details, including: camera angle and focus, people's appearance, age, look, costumes, clothes, the location visual characteristics and geometry, lighting, action, objects, weather, textures, lighting.
90
+ 11. Please write in the same style as the original description, by keeping things brief etc.
91
+
92
+ Remember to obey to what users said in the chat history!!
93
+
94
+ Now, you must write down the new scene description (don't write a long story! write a synthetic description!):"""
95
+
96
+ GENERATE_CLIP_PROMPT_TEMPLATE = """# Context and task
97
+ Please write the caption for a new clip.
98
+
99
+ # Instructions
100
+ 1. Consider the video context and recent events
101
+ 2. Create a natural progression from previous clips
102
+ 3. Take into account user suggestions (chat messages) into the scene
103
+ 4. Don't generate hateful, political, violent or sexual content
104
+ 5. Keep visual consistency with previous clips (in most cases you should repeat the same exact description of the location, characters etc but only change a few elements. If this is a webcam scenario, don't touch the camera orientation or focus)
105
+ 6. Return ONLY the caption text, no additional formatting or explanation
106
+ 7. Write in English, about 200 words.
107
+ 8. Keep the visual style consistant, but content as well (repeat the style, character, locations, appearance etc.. across scenes, when it makes sense).
108
+ 8. Your caption must describe visual elements of the scene in details, including: camera angle and focus, people's appearance, age, look, costumes, clothes, the location visual characteristics and geometry, lighting, action, objects, weather, textures, lighting.
109
+
110
+ # Examples
111
+ Here is a demo scenario, with fake data:
112
+ {{"time": "2024-11-29T13:36:15Z", "event": "new_stream_clip", "caption": "webcam view of a beautiful park, squirrels are playing in the lush grass, blablabla etc... (rest omitted for brevity)"}}
113
+ {{"time": "2024-11-29T13:36:20Z", "event": "new_chat_message", "username": "MonkeyLover89", "data": "hi"}}
114
+ {{"time": "2024-11-29T13:36:25Z", "event": "new_chat_message", "username": "MonkeyLover89", "data": "more squirrels plz"}}
115
+ {{"time": "2024-11-29T13:36:26Z", "event": "new_stream_clip", "caption": "webcam view of a beautiful park, a lot of squirrels are playing in the lush grass, blablabla etc... (rest omitted for brevity)"}}
116
+
117
+ # Real scenario and data
118
+
119
+ We are inside a video titled "{title}"
120
+ The video is described by: "{description}".
121
+ Here is a summary of the {event_count} most recent events:
122
+ {events_json}
123
+
124
+ # Your response
125
+ Your caption:"""
126
+
127
+
128
+ def get_inference_client(llm_config: Optional[dict] = None) -> InferenceClient:
129
+ """
130
+ Get an InferenceClient configured with the provided LLM settings.
131
+
132
+ Priority order for API keys:
133
+ 1. Provider-specific API key (if provided)
134
+ 2. User's HF token (if provided)
135
+ 3. Server's HF token (only for built-in provider)
136
+ 4. Raise exception if no valid key is available
137
+ """
138
+
139
+ if not llm_config:
140
+ if HF_TOKEN:
141
+ return InferenceClient(
142
+ model=TEXT_MODEL,
143
+ token=HF_TOKEN
144
+ )
145
+ else:
146
+ raise ValueError("Built-in provider is not available. Server HF_TOKEN is not configured.")
147
+
148
+ provider = llm_config.get('provider', '').lower()
149
+ #logger.info(f"provider = {provider}")
150
+
151
+ # If no provider or model specified, use default
152
+ if not provider or provider == 'built-in':
153
+ if HF_TOKEN:
154
+ return InferenceClient(
155
+ model=TEXT_MODEL,
156
+ token=HF_TOKEN
157
+ )
158
+ else:
159
+ raise ValueError("Built-in provider is not available. Server HF_TOKEN is not configured.")
160
+
161
+ model = llm_config.get('model', '')
162
+ user_provider_api_key = llm_config.get('api_key', '') # Provider-specific API key
163
+ user_hf_token = llm_config.get('hf_token', '') # User's HF token
164
+
165
+ try:
166
+ # Case 1: Use a provider with a provider-specific API key if available
167
+ # This mode is currently hidden in the Flutter UI (we don't ask for provider-specific keys yet)
168
+ # but it is implemented here so that we don't forget it later
169
+ if user_provider_api_key:
170
+ return InferenceClient(
171
+ provider=provider,
172
+ model=model,
173
+ api_key=user_provider_api_key
174
+ )
175
+
176
+ # Case 2: Use a provider with user's HF token if available
177
+ elif user_hf_token:
178
+ return InferenceClient(
179
+ provider=provider,
180
+ model=model,
181
+ token=user_hf_token
182
+ )
183
+ else:
184
+ raise ValueError(f"No API key provided for provider '{provider}'. Please provide either a valid {provider} API key or your Hugging Face API key.")
185
+
186
+ except ValueError:
187
+ # Re-raise ValueError for missing API keys
188
+ raise
189
+ except Exception as e:
190
+ logger.error(f"Error creating InferenceClient for provider '{provider}' and model '{model}': {e}")
191
+ # Re-raise all other exceptions
192
+ raise
193
+
194
+
195
+ async def generate_text(prompt: str, llm_config: Optional[dict] = None,
196
+ max_new_tokens: int = 200, temperature: float = 0.7,
197
+ model_override: Optional[str] = None) -> str:
198
+ """
199
+ Helper method to generate text using the appropriate client and configuration.
200
+ Tries chat_completion first (modern standard), falls back to text_generation.
201
+
202
+ Args:
203
+ prompt: The prompt to generate text from
204
+ llm_config: Optional LLM configuration dict
205
+ max_new_tokens: Maximum number of new tokens to generate
206
+ temperature: Temperature for generation
207
+ model_override: Optional model to use instead of the one in llm_config
208
+
209
+ Returns:
210
+ Generated text string
211
+ """
212
+ # Add game master prompt if provided
213
+ if llm_config and llm_config.get('game_master_prompt'):
214
+ game_master_prompt = llm_config['game_master_prompt'].strip()
215
+ if game_master_prompt:
216
+ prompt = f"{game_master_prompt}\n\n{prompt}"
217
+
218
+ # Get the appropriate client
219
+ client = get_inference_client(llm_config)
220
+
221
+ # Determine the model to use
222
+ if model_override:
223
+ model_to_use = model_override
224
+ elif llm_config:
225
+ model_to_use = llm_config.get('model', TEXT_MODEL)
226
+ else:
227
+ model_to_use = TEXT_MODEL
228
+
229
+ # Try chat_completion first (modern standard, more widely supported)
230
+ try:
231
+ messages = [{"role": "user", "content": prompt}]
232
+
233
+ if llm_config and llm_config.get('provider') != 'huggingface':
234
+ # For third-party providers
235
+ completion = await asyncio.get_event_loop().run_in_executor(
236
+ None,
237
+ lambda: client.chat.completions.create(
238
+ messages=messages,
239
+ max_tokens=max_new_tokens,
240
+ temperature=temperature
241
+ )
242
+ )
243
+ else:
244
+ # For HuggingFace models, specify the model
245
+ completion = await asyncio.get_event_loop().run_in_executor(
246
+ None,
247
+ lambda: client.chat.completions.create(
248
+ model=model_to_use,
249
+ messages=messages,
250
+ max_tokens=max_new_tokens,
251
+ temperature=temperature
252
+ )
253
+ )
254
+
255
+ # Extract the generated text from the chat completion response
256
+ return completion.choices[0].message.content
257
+
258
+ except Exception as e:
259
+ error_message = str(e).lower()
260
+ # Check if the error is related to task compatibility or API not supported
261
+ if ("not supported for task" in error_message or
262
+ "conversational" in error_message or
263
+ "chat" in error_message):
264
+ logger.info(f"chat_completion not supported, falling back to text_generation: {e}")
265
+
266
+ # Fall back to text_generation API
267
+ try:
268
+ if llm_config and llm_config.get('provider') != 'huggingface':
269
+ # For third-party providers
270
+ response = await asyncio.get_event_loop().run_in_executor(
271
+ None,
272
+ lambda: client.text_generation(
273
+ prompt,
274
+ max_new_tokens=max_new_tokens,
275
+ temperature=temperature
276
+ )
277
+ )
278
+ else:
279
+ # For HuggingFace models, specify the model
280
+ response = await asyncio.get_event_loop().run_in_executor(
281
+ None,
282
+ lambda: client.text_generation(
283
+ prompt,
284
+ model=model_to_use,
285
+ max_new_tokens=max_new_tokens,
286
+ temperature=temperature
287
+ )
288
+ )
289
+ return response
290
+
291
+ except Exception as text_error:
292
+ logger.error(f"Both chat_completion and text_generation failed: {text_error}")
293
+ raise text_error
294
+ else:
295
+ # Re-raise the original error if it's not a task compatibility issue
296
+ logger.error(f"chat_completion failed with non-compatibility error: {e}")
297
+ raise e
server/logging_utils.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Colored logging utilities for the TikSlop server.
3
+ """
4
+ import logging
5
+ import re
6
+
7
+ # ANSI color codes
8
+ class Colors:
9
+ RESET = '\033[0m'
10
+ BOLD = '\033[1m'
11
+ DIM = '\033[2m'
12
+
13
+ # Foreground colors
14
+ BLACK = '\033[30m'
15
+ RED = '\033[31m'
16
+ GREEN = '\033[32m'
17
+ YELLOW = '\033[33m'
18
+ BLUE = '\033[34m'
19
+ MAGENTA = '\033[35m'
20
+ CYAN = '\033[36m'
21
+ WHITE = '\033[37m'
22
+
23
+ # Bright colors
24
+ BRIGHT_BLACK = '\033[90m'
25
+ BRIGHT_RED = '\033[91m'
26
+ BRIGHT_GREEN = '\033[92m'
27
+ BRIGHT_YELLOW = '\033[93m'
28
+ BRIGHT_BLUE = '\033[94m'
29
+ BRIGHT_MAGENTA = '\033[95m'
30
+ BRIGHT_CYAN = '\033[96m'
31
+ BRIGHT_WHITE = '\033[97m'
32
+
33
+ # Background colors
34
+ BG_BLACK = '\033[40m'
35
+ BG_RED = '\033[41m'
36
+ BG_GREEN = '\033[42m'
37
+ BG_YELLOW = '\033[43m'
38
+ BG_BLUE = '\033[44m'
39
+ BG_MAGENTA = '\033[45m'
40
+ BG_CYAN = '\033[46m'
41
+ BG_WHITE = '\033[47m'
42
+
43
+ class ColoredFormatter(logging.Formatter):
44
+ """Custom formatter with colors and patterns"""
45
+
46
+ def __init__(self):
47
+ super().__init__()
48
+
49
+ def format(self, record):
50
+ # Color mapping for log levels
51
+ level_colors = {
52
+ 'DEBUG': Colors.BRIGHT_BLACK,
53
+ 'INFO': Colors.BRIGHT_CYAN,
54
+ 'WARNING': Colors.BRIGHT_YELLOW,
55
+ 'ERROR': Colors.BRIGHT_RED,
56
+ 'CRITICAL': Colors.BRIGHT_MAGENTA + Colors.BOLD
57
+ }
58
+
59
+ # Format timestamp
60
+ timestamp = f"{Colors.DIM}{self.formatTime(record, '%H:%M:%S.%f')[:-3]}{Colors.RESET}"
61
+
62
+ # Format level with color
63
+ level_color = level_colors.get(record.levelname, Colors.WHITE)
64
+ level = f"{level_color}{record.levelname:>7}{Colors.RESET}"
65
+
66
+ # Format logger name
67
+ logger_name = f"{Colors.BRIGHT_BLACK}[{record.name}]{Colors.RESET}"
68
+
69
+ # Format message with keyword highlighting
70
+ message = self.colorize_message(record.getMessage())
71
+
72
+ return f"{timestamp} {level} {logger_name} {message}"
73
+
74
+ def colorize_message(self, message):
75
+ """Add colors to specific keywords and patterns in the message"""
76
+
77
+ # Highlight request IDs in brackets (gray like logger names)
78
+ message = re.sub(r'\[([a-f0-9-]{36})\]', f'{Colors.BRIGHT_BLACK}[\\1]{Colors.RESET}', message)
79
+
80
+ # Highlight user IDs
81
+ message = re.sub(r'user ([a-zA-Z0-9-]+)', f'user {Colors.BRIGHT_BLUE}\\1{Colors.RESET}', message)
82
+
83
+ # Highlight actions
84
+ message = re.sub(r'(generate_video|search|simulate|join_chat|leave_chat|chat_message)',
85
+ f'{Colors.BRIGHT_YELLOW}\\1{Colors.RESET}', message)
86
+
87
+ # Highlight status keywords
88
+ message = re.sub(r'\b(success|successful|completed|connected|ready)\b',
89
+ f'{Colors.BRIGHT_GREEN}\\1{Colors.RESET}', message, flags=re.IGNORECASE)
90
+
91
+ message = re.sub(r'\b(error|failed|timeout|exception)\b',
92
+ f'{Colors.BRIGHT_RED}\\1{Colors.RESET}', message, flags=re.IGNORECASE)
93
+
94
+ message = re.sub(r'\b(warning|retry|reconnect)\b',
95
+ f'{Colors.BRIGHT_YELLOW}\\1{Colors.RESET}', message, flags=re.IGNORECASE)
96
+
97
+ # Highlight numbers (timing, counts, etc.) but not those inside UUIDs
98
+ message = re.sub(r'(?<![a-f0-9-])\b(\d+\.?\d*)(s|ms|chars|bytes)?\b(?![a-f0-9-])',
99
+ f'{Colors.BRIGHT_MAGENTA}\\1{Colors.CYAN}\\2{Colors.RESET}', message)
100
+
101
+ # Highlight roles
102
+ message = re.sub(r'\b(role|user_role)=([a-zA-Z]+)',
103
+ f'\\1={Colors.BRIGHT_CYAN}\\2{Colors.RESET}', message)
104
+
105
+ # Highlight titles in quotes
106
+ message = re.sub(r"title='([^']*)'", f"title='{Colors.GREEN}\\1{Colors.RESET}'", message)
107
+
108
+ return message
109
+
110
+ def setup_colored_logging():
111
+ """Set up colored logging for the entire application"""
112
+
113
+ # Configure logging with colors
114
+ logging.basicConfig(
115
+ level=logging.INFO,
116
+ handlers=[
117
+ logging.StreamHandler()
118
+ ]
119
+ )
120
+
121
+ # Set up colored formatter
122
+ handler = logging.StreamHandler()
123
+ handler.setFormatter(ColoredFormatter())
124
+
125
+ # Apply to root logger and clear default handlers
126
+ root_logger = logging.getLogger()
127
+ root_logger.handlers.clear()
128
+ root_logger.addHandler(handler)
129
+
130
+ def get_logger(name):
131
+ """Get a logger with the given name"""
132
+ return logging.getLogger(name)
server/models.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Data models and dataclasses used throughout the application.
3
+ """
4
+ from dataclasses import dataclass
5
+ from typing import Literal, Set, List, Dict, Any
6
+
7
+
8
+ # User role type
9
+ UserRole = Literal['anon', 'normal', 'pro', 'admin']
10
+
11
+
12
+ @dataclass
13
+ class Endpoint:
14
+ """Represents a video generation endpoint."""
15
+ id: int
16
+ url: str
17
+ busy: bool = False
18
+ last_used: float = 0
19
+ error_count: int = 0
20
+ error_until: float = 0 # Timestamp until which this endpoint is considered in error state
21
+
22
+
23
+ class ChatRoom:
24
+ """Represents a chat room for a video."""
25
+ def __init__(self):
26
+ self.messages: List[Dict[str, Any]] = []
27
+ self.connected_clients: Set[Any] = set()
28
+ self.max_history: int = 100
29
+
30
+ def add_message(self, message: Dict[str, Any]) -> None:
31
+ """Add a message to the chat room history."""
32
+ self.messages.append(message)
33
+ if len(self.messages) > self.max_history:
34
+ self.messages.pop(0)
35
+
36
+ def get_recent_messages(self, limit: int = 50) -> List[Dict[str, Any]]:
37
+ """Get the most recent messages from the chat room."""
38
+ return self.messages[-limit:]
server/utils.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Generic utility functions used across the application.
3
+ """
4
+ import random
5
+ import re
6
+
7
+
8
+ def generate_seed():
9
+ """Generate a random positive 32-bit integer seed."""
10
+ return random.randint(0, 2**32 - 1)
11
+
12
+
13
+ def sanitize_yaml_response(response_text: str) -> str:
14
+ """
15
+ Sanitize and format AI response into valid YAML.
16
+ Returns properly formatted YAML string.
17
+ """
18
+
19
+ # Pre-processing: Remove code block markers
20
+ if response_text.startswith("```yaml"):
21
+ # Remove the "```yaml" at the beginning and closing ```
22
+ response_text = response_text[7:] # Remove "```yaml" (7 characters)
23
+ if response_text.endswith("```"):
24
+ response_text = response_text[:-3] # Remove closing ```
25
+ response_text = response_text.strip()
26
+ elif response_text.startswith("```"):
27
+ # Remove the "```" at the beginning and closing ```
28
+ response_text = response_text[3:] # Remove opening ```
29
+ if response_text.endswith("```"):
30
+ response_text = response_text[:-3] # Remove closing ```
31
+ response_text = response_text.strip()
32
+
33
+ # Handle edge case where the LLM might have continued the prompt
34
+ # e.g., if the response starts with the incomplete prompt we provided
35
+ if response_text.startswith('title: \\"'):
36
+ # Remove the incomplete prompt prefix
37
+ response_text = response_text[9:].strip()
38
+
39
+ # Check if it already has a proper YAML structure
40
+ if not response_text.startswith(('title:', 'title :')):
41
+ # Only wrap with title if it doesn't already have one
42
+ # The sanitize function will handle escaping
43
+ response_text = f'title: {response_text}'
44
+
45
+ # Split on first occurrence of ``` to handle any remaining code blocks
46
+ response_text = response_text.split("```")[0]
47
+
48
+ # Remove any markdown code block indicators and YAML document markers
49
+ clean_text = re.sub(r'```yaml|```|---|\.\.\.$', '', response_text.strip())
50
+
51
+ # Handle the specific case where LLM duplicates 'title:' in the value
52
+ # e.g., title: "title: "Something"" -> title: "Something"
53
+ clean_text = re.sub(r'title:\s*"title:\s*"([^"]+)""?', r'title: "\1"', clean_text)
54
+ clean_text = re.sub(r'title:\s*\'title:\s*\'([^\']+)\'\'?', r'title: \'\1\'', clean_text)
55
+ clean_text = re.sub(r'title:\s*"title:\s*\'([^\']+)\'"?', r'title: "\1"', clean_text)
56
+ clean_text = re.sub(r'title:\s*\'title:\s*"([^"]+)"\'?', r'title: \'\1\'', clean_text)
57
+
58
+ # Also handle case where title appears twice without quotes
59
+ clean_text = re.sub(r'title:\s*title:\s*(.+)$', r'title: \1', clean_text, flags=re.MULTILINE)
60
+
61
+ # Split into lines and process each line
62
+ lines = clean_text.split('\n')
63
+ sanitized_lines = []
64
+ current_field = None
65
+
66
+ for line in lines:
67
+ stripped = line.strip()
68
+ if not stripped:
69
+ continue
70
+
71
+ # Handle field starts
72
+ if stripped.startswith('title:') or stripped.startswith('description:'):
73
+ # Ensure proper YAML format with space after colon and proper quoting
74
+ field_name = stripped.split(':', 1)[0]
75
+ field_value = stripped.split(':', 1)[1].strip()
76
+
77
+ # Remove outer quotes first
78
+ if (field_value.startswith('"') and field_value.endswith('"')) or \
79
+ (field_value.startswith("'") and field_value.endswith("'")):
80
+ field_value = field_value[1:-1]
81
+
82
+ # Check for nested title pattern again (in case it wasn't caught by regex)
83
+ if field_name == 'title' and field_value.lower().startswith('title:'):
84
+ # Remove the nested 'title:' prefix
85
+ field_value = field_value[6:].strip().strip('"\'')
86
+
87
+ # Escape any internal quotes
88
+ field_value = field_value.replace('"', '\\"')
89
+
90
+ # Always quote the value to ensure proper YAML formatting
91
+ field_value = f'"{field_value}"'
92
+
93
+ sanitized_lines.append(f"{field_name}: {field_value}")
94
+ current_field = field_name
95
+
96
+ elif stripped.startswith('tags:'):
97
+ sanitized_lines.append('tags:')
98
+ current_field = 'tags'
99
+
100
+ elif stripped.startswith('-') and current_field == 'tags':
101
+ # Process tag values
102
+ tag = stripped[1:].strip().strip('"\'')
103
+ if tag:
104
+ # Clean and format tag
105
+ tag = re.sub(r'[^\x00-\x7F]+', '', tag) # Remove non-ASCII
106
+ tag = re.sub(r'[^a-zA-Z0-9\s-]', '', tag) # Keep only alphanumeric and hyphen
107
+ tag = tag.strip().lower().replace(' ', '-')
108
+ if tag:
109
+ sanitized_lines.append(f" - {tag}")
110
+
111
+ elif current_field in ['title', 'description']:
112
+ # Handle multi-line title/description continuation
113
+ value = stripped.strip('"\'')
114
+ if value:
115
+ # Append to previous line (but within the quotes)
116
+ prev = sanitized_lines[-1]
117
+ # Remove the closing quote, append the value, and add the quote back
118
+ if prev.endswith('"'):
119
+ sanitized_lines[-1] = f'{prev[:-1]} {value}"'
120
+
121
+ # Ensure the YAML has all required fields
122
+ required_fields = {'title', 'description', 'tags'}
123
+ found_fields = {line.split(':')[0].strip() for line in sanitized_lines if ':' in line}
124
+
125
+ for field in required_fields - found_fields:
126
+ if field == 'tags':
127
+ sanitized_lines.extend(['tags:', ' - default'])
128
+ else:
129
+ sanitized_lines.append(f'{field}: "No {field} provided"')
130
+
131
+ return '\n'.join(sanitized_lines)
server/video_utils.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Video generation utilities for HuggingFace endpoints and Gradio spaces.
3
+ """
4
+ import asyncio
5
+ import time
6
+ import uuid
7
+ import logging
8
+ from typing import Dict
9
+ from aiohttp import ClientSession
10
+ from gradio_client import Client
11
+ from .models import UserRole, Endpoint
12
+ from .api_config import HF_TOKEN, GUIDANCE_SCALE
13
+ from .logging_utils import get_logger
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ async def generate_video_content_with_inference_endpoints(
19
+ endpoint_manager, prompt: str, negative_prompt: str, width: int,
20
+ height: int, num_frames: int, num_inference_steps: int,
21
+ frame_rate: int, seed: int, options: dict, user_role: UserRole
22
+ ) -> str:
23
+ """
24
+ Internal method to generate video content with specific parameters.
25
+ Used by both regular video generation and thumbnail generation.
26
+ """
27
+ is_thumbnail = options.get('thumbnail', False)
28
+ request_id = options.get('request_id', str(uuid.uuid4())[:8]) # Get or generate request ID
29
+ video_id = options.get('video_id', 'unknown')
30
+
31
+ # logger.info(f"[{request_id}] Generating {'thumbnail' if is_thumbnail else 'video'} for video {video_id} with seed {seed}")
32
+
33
+ json_payload = {
34
+ "inputs": {
35
+ "prompt": prompt,
36
+ },
37
+ "parameters": {
38
+ # ------------------- settings for LTX-Video -----------------------
39
+ "negative_prompt": negative_prompt,
40
+ "width": width,
41
+ "height": height,
42
+ "num_frames": num_frames,
43
+ "num_inference_steps": num_inference_steps,
44
+ "guidance_scale": options.get('guidance_scale', GUIDANCE_SCALE),
45
+ "seed": seed,
46
+
47
+ # ------------------- settings for Varnish -----------------------
48
+ "double_num_frames": False, # <- False for real-time generation
49
+ "fps": frame_rate,
50
+ "super_resolution": False, # <- False for real-time generation
51
+ "grain_amount": 0, # No film grain (on low-res, low-quality generation the effects aren't worth it + it adds weight to the MP4 payload)
52
+ }
53
+ }
54
+
55
+ # Add thumbnail flag to help with metrics and debugging
56
+ if is_thumbnail:
57
+ json_payload["metadata"] = {
58
+ "is_thumbnail": True,
59
+ "thumbnail_version": "1.0",
60
+ "request_id": request_id
61
+ }
62
+
63
+ # logger.info(f"[{request_id}] Waiting for an available endpoint...")
64
+ async with endpoint_manager.get_endpoint() as endpoint:
65
+ # logger.info(f"[{request_id}] Using endpoint {endpoint.id} for generation")
66
+
67
+ try:
68
+ async with ClientSession() as session:
69
+ #logger.info(f"[{request_id}] Sending request to endpoint {endpoint.id}: {endpoint.url}")
70
+ start_time = time.time()
71
+
72
+ # Proceed with actual request
73
+ async with session.post(
74
+ endpoint.url,
75
+ headers={
76
+ "Accept": "application/json",
77
+ "Authorization": f"Bearer {HF_TOKEN}",
78
+ "Content-Type": "application/json",
79
+ "X-Request-ID": request_id # Add request ID to headers
80
+ },
81
+ json=json_payload,
82
+ timeout=12 # Extended timeout for thumbnails (was 8s)
83
+ ) as response:
84
+ request_duration = time.time() - start_time
85
+ #logger.info(f"[{request_id}] Received response from endpoint {endpoint.id} in {request_duration:.2f}s: HTTP {response.status}")
86
+
87
+ if response.status != 200:
88
+ error_text = await response.text()
89
+ logger.error(f"[{request_id}] Failed response: {error_text}")
90
+ # Mark endpoint as in error state
91
+ await endpoint_manager.mark_endpoint_error(endpoint)
92
+ if "paused" in error_text:
93
+ logger.error(f"[{request_id}] Endpoint is paused")
94
+ return ""
95
+ raise Exception(f"Video generation failed: HTTP {response.status} - {error_text}")
96
+
97
+ result = await response.json()
98
+ #logger.info(f"[{request_id}] Successfully parsed JSON response")
99
+
100
+ if "error" in result:
101
+ error_msg = result['error']
102
+ logger.error(f"[{request_id}] Error in response: {error_msg}")
103
+ # Mark endpoint as in error state
104
+ await endpoint_manager.mark_endpoint_error(endpoint)
105
+ if "paused" in str(error_msg).lower():
106
+ logger.error(f"[{request_id}] Endpoint is paused")
107
+ return ""
108
+ raise Exception(f"Video generation failed: {error_msg}")
109
+
110
+ video_data_uri = result.get("video")
111
+ if not video_data_uri:
112
+ logger.error(f"[{request_id}] No video data in response")
113
+ # Mark endpoint as in error state
114
+ await endpoint_manager.mark_endpoint_error(endpoint)
115
+ raise Exception("No video data in response")
116
+
117
+ # Get data size
118
+ data_size = len(video_data_uri)
119
+ #logger.info(f"[{request_id}] Received video data: {data_size} chars")
120
+
121
+ # Reset error count on successful call
122
+ endpoint.error_count = 0
123
+ endpoint.error_until = 0
124
+
125
+ return video_data_uri
126
+
127
+ except asyncio.TimeoutError:
128
+ # Handle timeout specifically
129
+ logger.error(f"[{request_id}] Timeout occurred after {time.time() - start_time:.2f}s")
130
+ await endpoint_manager.mark_endpoint_error(endpoint, is_timeout=True)
131
+ return ""
132
+ except Exception as e:
133
+ # Handle all other exceptions
134
+ logger.error(f"[{request_id}] Exception during video generation: {str(e)}")
135
+ if not isinstance(e, asyncio.TimeoutError): # Already handled above
136
+ await endpoint_manager.mark_endpoint_error(endpoint)
137
+ return ""
138
+
139
+
140
+ async def generate_video_content_with_gradio(
141
+ endpoint_manager, prompt: str, negative_prompt: str, width: int,
142
+ height: int, num_frames: int, num_inference_steps: int,
143
+ frame_rate: int, seed: int, options: dict, user_role: UserRole
144
+ ) -> str:
145
+ """
146
+ Internal method to generate video content with specific parameters.
147
+ Used by both regular video generation and thumbnail generation.
148
+ This version uses our generic gradio space.
149
+ """
150
+ is_thumbnail = options.get('thumbnail', False)
151
+ request_id = options.get('request_id', str(uuid.uuid4())[:8]) # Get or generate request ID
152
+ video_id = options.get('video_id', 'unknown')
153
+
154
+ # logger.info(f"[{request_id}] Generating {'thumbnail' if is_thumbnail else 'video'} for video {video_id} with seed {seed}")
155
+
156
+ # Define the synchronous function
157
+ def _sync_gradio_call():
158
+ client = Client("jbilcke-hf/fast-rendering-node", hf_token=HF_TOKEN)
159
+
160
+ return client.predict(
161
+ prompt=prompt,
162
+ seed=seed,
163
+ fps=8, # frame_rate, # attention, right now tilslop asks for 25 FPS
164
+ width=640, # width, # attention, right now tikslop asks for 1152
165
+ height=320, # height, # attention, righ tnow tikslop asks for 640
166
+ duration=3, # num_frames // frame_rate
167
+ )
168
+
169
+ # Run in a thread using asyncio.to_thread (Python 3.9+)
170
+ video_data_uri = await asyncio.to_thread(_sync_gradio_call)
171
+
172
+ return video_data_uri
173
+
174
+