K00B404 commited on
Commit
cf3c22b
·
verified ·
1 Parent(s): 0cff3e4

Update poweredimg2vid.py

Browse files
Files changed (1) hide show
  1. poweredimg2vid.py +365 -0
poweredimg2vid.py CHANGED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import io
4
+ import requests
5
+ from typing import Dict, Any, Optional, List
6
+ from PIL import Image
7
+ import numpy as np
8
+
9
+ class AIImageVideoPipeline:
10
+ """
11
+ Comprehensive AI-powered Image-to-Video Generation Pipeline
12
+
13
+ ## Workflow Stages
14
+ 1. Initial Image Generation
15
+ 2. Iterative Outpainting
16
+ 3. LTX Video Transformation
17
+
18
+ ## Technical Architecture
19
+ - Modular design with configurable components
20
+ - Support for multiple AI inference endpoints
21
+ - Robust error handling and logging
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ image_generation_endpoint: Optional[str] = None,
27
+ outpainting_endpoint: Optional[str] = None,
28
+ ltx_video_endpoint: Optional[str] = None,
29
+ api_token: Optional[str] = None
30
+ ):
31
+ """
32
+ Initialize the AI Image-to-Video pipeline.
33
+
34
+ Args:
35
+ image_generation_endpoint (str): Endpoint for initial image generation
36
+ outpainting_endpoint (str): Endpoint for image outpainting
37
+ ltx_video_endpoint (str): Endpoint for LTX video generation
38
+ api_token (str): Authentication token for API calls
39
+ """
40
+ self.endpoints = {
41
+ 'image_gen': image_generation_endpoint or os.getenv('IMAGE_GEN_ENDPOINT'),
42
+ 'outpainting': outpainting_endpoint or os.getenv('OUTPAINTING_ENDPOINT'),
43
+ 'ltx_video': ltx_video_endpoint or os.getenv('LTX_VIDEO_ENDPOINT')
44
+ }
45
+ self.api_token = api_token or os.getenv('HF_API_TOKEN')
46
+
47
+ # Validate endpoint configuration
48
+ self._validate_endpoints()
49
+
50
+ def _validate_endpoints(self):
51
+ """
52
+ Validate configured API endpoints.
53
+
54
+ Raises:
55
+ ValueError: If any required endpoint is missing
56
+ """
57
+ missing_endpoints = [
58
+ key for key, value in self.endpoints.items()
59
+ if not value
60
+ ]
61
+
62
+ if missing_endpoints:
63
+ raise ValueError(
64
+ f"Missing API endpoints: {', '.join(missing_endpoints)}. "
65
+ "Please configure via parameters or environment variables."
66
+ )
67
+
68
+ def encode_image(
69
+ self,
70
+ image: Image.Image,
71
+ format: str = 'JPEG'
72
+ ) -> str:
73
+ """
74
+ Encode PIL Image to base64 data URI.
75
+
76
+ Args:
77
+ image (Image.Image): Input image
78
+ format (str): Output image format
79
+
80
+ Returns:
81
+ str: Base64 encoded data URI
82
+ """
83
+ img_byte_arr = io.BytesIO()
84
+ image.save(img_byte_arr, format=format)
85
+ base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
86
+ return f"data:image/{format.lower()};base64,{base64_encoded}"
87
+
88
+ def generate_initial_image(
89
+ self,
90
+ prompt: str,
91
+ width: int = 768,
92
+ height: int = 480
93
+ ) -> Image.Image:
94
+ """
95
+ Generate initial image using text prompt.
96
+
97
+ Args:
98
+ prompt (str): Image generation prompt
99
+ width (int): Image width
100
+ height (int): Image height
101
+
102
+ Returns:
103
+ Image.Image: Generated image
104
+ """
105
+ payload = {
106
+ "inputs": prompt,
107
+ "parameters": {
108
+ "width": width,
109
+ "height": height
110
+ }
111
+ }
112
+
113
+ response = self._make_api_call(
114
+ self.endpoints['image_gen'],
115
+ payload
116
+ )
117
+
118
+ return self._decode_image_response(response)
119
+
120
+ def iterative_outpainting(
121
+ self,
122
+ image: Image.Image,
123
+ prompt: str,
124
+ iterations: int = 3,
125
+ padding_size: int = 256
126
+ ) -> Image.Image:
127
+ """
128
+ Perform iterative outpainting to expand image.
129
+
130
+ Args:
131
+ image (Image.Image): Starting image
132
+ prompt (str): Outpainting generation prompt
133
+ iterations (int): Number of outpainting steps
134
+ padding_size (int): Padding size for each iteration
135
+
136
+ Returns:
137
+ Image.Image: Final outpainted image
138
+ """
139
+ current_image = image.copy()
140
+
141
+ for _ in range(iterations):
142
+ # Create padded image
143
+ padded_size = (
144
+ current_image.width + 2 * padding_size,
145
+ current_image.height + 2 * padding_size
146
+ )
147
+ padded_image = Image.new('RGBA', padded_size, (0, 0, 0, 0))
148
+ padded_image.paste(
149
+ current_image,
150
+ (padding_size, padding_size)
151
+ )
152
+
153
+ # Create mask for padding regions
154
+ mask = self._create_padding_mask(padded_image, padding_size)
155
+
156
+ # Outpainting request
157
+ payload = {
158
+ "inputs": prompt,
159
+ "image": self.encode_image(padded_image),
160
+ "mask_image": self.encode_image(mask)
161
+ }
162
+
163
+ response = self._make_api_call(
164
+ self.endpoints['outpainting'],
165
+ payload
166
+ )
167
+
168
+ current_image = self._decode_image_response(response)
169
+
170
+ return current_image
171
+
172
+ def _create_padding_mask(
173
+ self,
174
+ image: Image.Image,
175
+ padding_size: int
176
+ ) -> Image.Image:
177
+ """
178
+ Generate a mask indicating padding regions.
179
+
180
+ Args:
181
+ image (Image.Image): Source image
182
+ padding_size (int): Size of padding
183
+
184
+ Returns:
185
+ Image.Image: Mask image
186
+ """
187
+ mask = Image.new('L', image.size, 0)
188
+ mask_array = np.array(mask)
189
+
190
+ # Mark padding regions white (255)
191
+ mask_array[:padding_size, :] = 255 # Top
192
+ mask_array[-padding_size:, :] = 255 # Bottom
193
+ mask_array[:, :padding_size] = 255 # Left
194
+ mask_array[:, -padding_size:] = 255 # Right
195
+
196
+ return Image.fromarray(mask_array)
197
+
198
+ def generate_ltx_video(
199
+ self,
200
+ image: Image.Image,
201
+ prompt: str = "",
202
+ video_config: Optional[Dict[str, Any]] = None
203
+ ) -> Dict[str, Any]:
204
+ """
205
+ Generate video using LTX video generation API.
206
+
207
+ Args:
208
+ image (Image.Image): Input image
209
+ prompt (str, optional): Optional video generation prompt
210
+ video_config (Dict, optional): Custom video generation parameters
211
+
212
+ Returns:
213
+ Dict: API response containing video generation details
214
+ """
215
+ default_config = {
216
+ "width": 768,
217
+ "height": 480,
218
+ "num_frames": 129, # 8*16 + 1
219
+ "num_inference_steps": 50,
220
+ "guidance_scale": 4.0,
221
+ "double_num_frames": True,
222
+ "fps": 60,
223
+ "super_resolution": True,
224
+ "grain_amount": 12
225
+ }
226
+
227
+ # Merge default and custom configurations
228
+ config = {**default_config, **(video_config or {})}
229
+
230
+ payload = {
231
+ "inputs": {
232
+ "image": self.encode_image(image),
233
+ "prompt": prompt
234
+ },
235
+ "parameters": config
236
+ }
237
+
238
+ return self._make_api_call(
239
+ self.endpoints['ltx_video'],
240
+ payload
241
+ )
242
+
243
+ def _make_api_call(
244
+ self,
245
+ endpoint: str,
246
+ payload: Dict[str, Any]
247
+ ) -> Dict[str, Any]:
248
+ """
249
+ Execute API request with error handling.
250
+
251
+ Args:
252
+ endpoint (str): API endpoint URL
253
+ payload (Dict): Request payload
254
+
255
+ Returns:
256
+ Dict: API response
257
+ """
258
+ headers = {
259
+ "Authorization": f"Bearer {self.api_token}",
260
+ "Content-Type": "application/json",
261
+ "Accept": "application/json"
262
+ }
263
+
264
+ try:
265
+ response = requests.post(
266
+ endpoint,
267
+ headers=headers,
268
+ json=payload
269
+ )
270
+ response.raise_for_status()
271
+ return response.json()
272
+
273
+ except requests.RequestException as e:
274
+ raise RuntimeError(f"API call failed: {e}")
275
+
276
+ def _decode_image_response(
277
+ self,
278
+ response: Dict[str, Any]
279
+ ) -> Image.Image:
280
+ """
281
+ Decode image from API response.
282
+
283
+ Args:
284
+ response (Dict): API response
285
+
286
+ Returns:
287
+ Image.Image: Decoded image
288
+ """
289
+ if 'image' not in response:
290
+ raise ValueError("No image found in API response")
291
+
292
+ image_data = response['image'].split(",")[1]
293
+ image_bytes = base64.b64decode(image_data)
294
+ return Image.open(io.BytesIO(image_bytes))
295
+
296
+ def full_pipeline(
297
+ self,
298
+ initial_prompt: str,
299
+ outpainting_prompt: Optional[str] = None,
300
+ video_prompt: Optional[str] = None
301
+ ) -> Dict[str, Any]:
302
+ """
303
+ Execute complete image-to-video pipeline.
304
+
305
+ Args:
306
+ initial_prompt (str): Prompt for initial image generation
307
+ outpainting_prompt (str, optional): Prompt for image expansion
308
+ video_prompt (str, optional): Prompt for video generation
309
+
310
+ Returns:
311
+ Dict: Pipeline execution results
312
+ """
313
+ # 1. Generate Initial Image
314
+ initial_image = self.generate_initial_image(initial_prompt)
315
+
316
+ # 2. Outpainting (optional)
317
+ if outpainting_prompt:
318
+ expanded_image = self.iterative_outpainting(
319
+ initial_image,
320
+ outpainting_prompt
321
+ )
322
+ else:
323
+ expanded_image = initial_image
324
+
325
+ # 3. Video Generation
326
+ video_response = self.generate_ltx_video(
327
+ expanded_image,
328
+ video_prompt
329
+ )
330
+
331
+ return {
332
+ "initial_image": initial_image,
333
+ "expanded_image": expanded_image,
334
+ "video_response": video_response
335
+ }
336
+
337
+ def main():
338
+ """
339
+ Demonstration of full AI Image-to-Video pipeline.
340
+ """
341
+ pipeline = AIImageVideoPipeline(
342
+ image_generation_endpoint="YOUR_IMAGE_GEN_ENDPOINT",
343
+ outpainting_endpoint="YOUR_OUTPAINTING_ENDPOINT",
344
+ ltx_video_endpoint="YOUR_LTX_VIDEO_ENDPOINT",
345
+ api_token="YOUR_HF_API_TOKEN"
346
+ )
347
+
348
+ try:
349
+ result = pipeline.full_pipeline(
350
+ initial_prompt="Serene landscape with mountains and a lake",
351
+ outpainting_prompt="Expand the scene with more natural elements",
352
+ video_prompt="Smooth camera pan across the landscape"
353
+ )
354
+
355
+ # Save images and process video
356
+ result['initial_image'].save("initial_image.png")
357
+ result['expanded_image'].save("expanded_image.png")
358
+
359
+ print("Pipeline execution completed successfully!")
360
+
361
+ except Exception as e:
362
+ print(f"Pipeline execution failed: {e}")
363
+
364
+ if __name__ == "__main__":
365
+ main()