|
import os |
|
import base64 |
|
import io |
|
import requests |
|
from typing import Dict, Any, Optional, List |
|
from PIL import Image |
|
import numpy as np |
|
|
|
class AIImageVideoPipeline: |
|
""" |
|
Comprehensive AI-powered Image-to-Video Generation Pipeline |
|
|
|
## Workflow Stages |
|
1. Initial Image Generation |
|
2. Iterative Outpainting |
|
3. LTX Video Transformation |
|
|
|
## Technical Architecture |
|
- Modular design with configurable components |
|
- Support for multiple AI inference endpoints |
|
- Robust error handling and logging |
|
""" |
|
|
|
def __init__( |
|
self, |
|
image_generation_endpoint: Optional[str] = None, |
|
outpainting_endpoint: Optional[str] = None, |
|
ltx_video_endpoint: Optional[str] = None, |
|
api_token: Optional[str] = None |
|
): |
|
""" |
|
Initialize the AI Image-to-Video pipeline. |
|
|
|
Args: |
|
image_generation_endpoint (str): Endpoint for initial image generation |
|
outpainting_endpoint (str): Endpoint for image outpainting |
|
ltx_video_endpoint (str): Endpoint for LTX video generation |
|
api_token (str): Authentication token for API calls |
|
""" |
|
self.endpoints = { |
|
'image_gen': image_generation_endpoint or os.getenv('IMAGE_GEN_ENDPOINT'), |
|
'outpainting': outpainting_endpoint or os.getenv('OUTPAINTING_ENDPOINT'), |
|
'ltx_video': ltx_video_endpoint or os.getenv('LTX_VIDEO_ENDPOINT') |
|
} |
|
self.api_token = api_token or os.getenv('HF_API_TOKEN') |
|
|
|
|
|
self._validate_endpoints() |
|
|
|
def _validate_endpoints(self): |
|
""" |
|
Validate configured API endpoints. |
|
|
|
Raises: |
|
ValueError: If any required endpoint is missing |
|
""" |
|
missing_endpoints = [ |
|
key for key, value in self.endpoints.items() |
|
if not value |
|
] |
|
|
|
if missing_endpoints: |
|
raise ValueError( |
|
f"Missing API endpoints: {', '.join(missing_endpoints)}. " |
|
"Please configure via parameters or environment variables." |
|
) |
|
|
|
def encode_image( |
|
self, |
|
image: Image.Image, |
|
format: str = 'JPEG' |
|
) -> str: |
|
""" |
|
Encode PIL Image to base64 data URI. |
|
|
|
Args: |
|
image (Image.Image): Input image |
|
format (str): Output image format |
|
|
|
Returns: |
|
str: Base64 encoded data URI |
|
""" |
|
img_byte_arr = io.BytesIO() |
|
image.save(img_byte_arr, format=format) |
|
base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8') |
|
return f"data:image/{format.lower()};base64,{base64_encoded}" |
|
|
|
def generate_initial_image( |
|
self, |
|
prompt: str, |
|
width: int = 768, |
|
height: int = 480 |
|
) -> Image.Image: |
|
""" |
|
Generate initial image using text prompt. |
|
|
|
Args: |
|
prompt (str): Image generation prompt |
|
width (int): Image width |
|
height (int): Image height |
|
|
|
Returns: |
|
Image.Image: Generated image |
|
""" |
|
payload = { |
|
"inputs": prompt, |
|
"parameters": { |
|
"width": width, |
|
"height": height |
|
} |
|
} |
|
|
|
response = self._make_api_call( |
|
self.endpoints['image_gen'], |
|
payload |
|
) |
|
|
|
return self._decode_image_response(response) |
|
|
|
def iterative_outpainting( |
|
self, |
|
image: Image.Image, |
|
prompt: str, |
|
iterations: int = 3, |
|
padding_size: int = 256 |
|
) -> Image.Image: |
|
""" |
|
Perform iterative outpainting to expand image. |
|
|
|
Args: |
|
image (Image.Image): Starting image |
|
prompt (str): Outpainting generation prompt |
|
iterations (int): Number of outpainting steps |
|
padding_size (int): Padding size for each iteration |
|
|
|
Returns: |
|
Image.Image: Final outpainted image |
|
""" |
|
current_image = image.copy() |
|
|
|
for _ in range(iterations): |
|
|
|
padded_size = ( |
|
current_image.width + 2 * padding_size, |
|
current_image.height + 2 * padding_size |
|
) |
|
padded_image = Image.new('RGBA', padded_size, (0, 0, 0, 0)) |
|
padded_image.paste( |
|
current_image, |
|
(padding_size, padding_size) |
|
) |
|
|
|
|
|
mask = self._create_padding_mask(padded_image, padding_size) |
|
|
|
|
|
payload = { |
|
"inputs": prompt, |
|
"image": self.encode_image(padded_image), |
|
"mask_image": self.encode_image(mask) |
|
} |
|
|
|
response = self._make_api_call( |
|
self.endpoints['outpainting'], |
|
payload |
|
) |
|
|
|
current_image = self._decode_image_response(response) |
|
|
|
return current_image |
|
|
|
def _create_padding_mask( |
|
self, |
|
image: Image.Image, |
|
padding_size: int |
|
) -> Image.Image: |
|
""" |
|
Generate a mask indicating padding regions. |
|
|
|
Args: |
|
image (Image.Image): Source image |
|
padding_size (int): Size of padding |
|
|
|
Returns: |
|
Image.Image: Mask image |
|
""" |
|
mask = Image.new('L', image.size, 0) |
|
mask_array = np.array(mask) |
|
|
|
|
|
mask_array[:padding_size, :] = 255 |
|
mask_array[-padding_size:, :] = 255 |
|
mask_array[:, :padding_size] = 255 |
|
mask_array[:, -padding_size:] = 255 |
|
|
|
return Image.fromarray(mask_array) |
|
|
|
def generate_ltx_video( |
|
self, |
|
image: Image.Image, |
|
prompt: str = "", |
|
video_config: Optional[Dict[str, Any]] = None |
|
) -> Dict[str, Any]: |
|
""" |
|
Generate video using LTX video generation API. |
|
|
|
Args: |
|
image (Image.Image): Input image |
|
prompt (str, optional): Optional video generation prompt |
|
video_config (Dict, optional): Custom video generation parameters |
|
|
|
Returns: |
|
Dict: API response containing video generation details |
|
""" |
|
default_config = { |
|
"width": 768, |
|
"height": 480, |
|
"num_frames": 129, |
|
"num_inference_steps": 50, |
|
"guidance_scale": 4.0, |
|
"double_num_frames": True, |
|
"fps": 60, |
|
"super_resolution": True, |
|
"grain_amount": 12 |
|
} |
|
|
|
|
|
config = {**default_config, **(video_config or {})} |
|
|
|
payload = { |
|
"inputs": { |
|
"image": self.encode_image(image), |
|
"prompt": prompt |
|
}, |
|
"parameters": config |
|
} |
|
|
|
return self._make_api_call( |
|
self.endpoints['ltx_video'], |
|
payload |
|
) |
|
|
|
def _make_api_call( |
|
self, |
|
endpoint: str, |
|
payload: Dict[str, Any] |
|
) -> Dict[str, Any]: |
|
""" |
|
Execute API request with error handling. |
|
|
|
Args: |
|
endpoint (str): API endpoint URL |
|
payload (Dict): Request payload |
|
|
|
Returns: |
|
Dict: API response |
|
""" |
|
headers = { |
|
"Authorization": f"Bearer {self.api_token}", |
|
"Content-Type": "application/json", |
|
"Accept": "application/json" |
|
} |
|
|
|
try: |
|
response = requests.post( |
|
endpoint, |
|
headers=headers, |
|
json=payload |
|
) |
|
response.raise_for_status() |
|
return response.json() |
|
|
|
except requests.RequestException as e: |
|
raise RuntimeError(f"API call failed: {e}") |
|
|
|
def _decode_image_response( |
|
self, |
|
response: Dict[str, Any] |
|
) -> Image.Image: |
|
""" |
|
Decode image from API response. |
|
|
|
Args: |
|
response (Dict): API response |
|
|
|
Returns: |
|
Image.Image: Decoded image |
|
""" |
|
if 'image' not in response: |
|
raise ValueError("No image found in API response") |
|
|
|
image_data = response['image'].split(",")[1] |
|
image_bytes = base64.b64decode(image_data) |
|
return Image.open(io.BytesIO(image_bytes)) |
|
|
|
def full_pipeline( |
|
self, |
|
initial_prompt: str, |
|
outpainting_prompt: Optional[str] = None, |
|
video_prompt: Optional[str] = None |
|
) -> Dict[str, Any]: |
|
""" |
|
Execute complete image-to-video pipeline. |
|
|
|
Args: |
|
initial_prompt (str): Prompt for initial image generation |
|
outpainting_prompt (str, optional): Prompt for image expansion |
|
video_prompt (str, optional): Prompt for video generation |
|
|
|
Returns: |
|
Dict: Pipeline execution results |
|
""" |
|
|
|
initial_image = self.generate_initial_image(initial_prompt) |
|
|
|
|
|
if outpainting_prompt: |
|
expanded_image = self.iterative_outpainting( |
|
initial_image, |
|
outpainting_prompt |
|
) |
|
else: |
|
expanded_image = initial_image |
|
|
|
|
|
video_response = self.generate_ltx_video( |
|
expanded_image, |
|
video_prompt |
|
) |
|
|
|
return { |
|
"initial_image": initial_image, |
|
"expanded_image": expanded_image, |
|
"video_response": video_response |
|
} |
|
|
|
def main(): |
|
""" |
|
Demonstration of full AI Image-to-Video pipeline. |
|
""" |
|
pipeline = AIImageVideoPipeline( |
|
image_generation_endpoint="YOUR_IMAGE_GEN_ENDPOINT", |
|
outpainting_endpoint="YOUR_OUTPAINTING_ENDPOINT", |
|
ltx_video_endpoint="YOUR_LTX_VIDEO_ENDPOINT", |
|
api_token="YOUR_HF_API_TOKEN" |
|
) |
|
|
|
try: |
|
result = pipeline.full_pipeline( |
|
initial_prompt="Serene landscape with mountains and a lake", |
|
outpainting_prompt="Expand the scene with more natural elements", |
|
video_prompt="Smooth camera pan across the landscape" |
|
) |
|
|
|
|
|
result['initial_image'].save("initial_image.png") |
|
result['expanded_image'].save("expanded_image.png") |
|
|
|
print("Pipeline execution completed successfully!") |
|
|
|
except Exception as e: |
|
print(f"Pipeline execution failed: {e}") |
|
|
|
if __name__ == "__main__": |
|
main() |