3Luik / poweredimg2vid.py
K00B404's picture
Update poweredimg2vid.py
cf3c22b verified
import os
import base64
import io
import requests
from typing import Dict, Any, Optional, List
from PIL import Image
import numpy as np
class AIImageVideoPipeline:
"""
Comprehensive AI-powered Image-to-Video Generation Pipeline
## Workflow Stages
1. Initial Image Generation
2. Iterative Outpainting
3. LTX Video Transformation
## Technical Architecture
- Modular design with configurable components
- Support for multiple AI inference endpoints
- Robust error handling and logging
"""
def __init__(
self,
image_generation_endpoint: Optional[str] = None,
outpainting_endpoint: Optional[str] = None,
ltx_video_endpoint: Optional[str] = None,
api_token: Optional[str] = None
):
"""
Initialize the AI Image-to-Video pipeline.
Args:
image_generation_endpoint (str): Endpoint for initial image generation
outpainting_endpoint (str): Endpoint for image outpainting
ltx_video_endpoint (str): Endpoint for LTX video generation
api_token (str): Authentication token for API calls
"""
self.endpoints = {
'image_gen': image_generation_endpoint or os.getenv('IMAGE_GEN_ENDPOINT'),
'outpainting': outpainting_endpoint or os.getenv('OUTPAINTING_ENDPOINT'),
'ltx_video': ltx_video_endpoint or os.getenv('LTX_VIDEO_ENDPOINT')
}
self.api_token = api_token or os.getenv('HF_API_TOKEN')
# Validate endpoint configuration
self._validate_endpoints()
def _validate_endpoints(self):
"""
Validate configured API endpoints.
Raises:
ValueError: If any required endpoint is missing
"""
missing_endpoints = [
key for key, value in self.endpoints.items()
if not value
]
if missing_endpoints:
raise ValueError(
f"Missing API endpoints: {', '.join(missing_endpoints)}. "
"Please configure via parameters or environment variables."
)
def encode_image(
self,
image: Image.Image,
format: str = 'JPEG'
) -> str:
"""
Encode PIL Image to base64 data URI.
Args:
image (Image.Image): Input image
format (str): Output image format
Returns:
str: Base64 encoded data URI
"""
img_byte_arr = io.BytesIO()
image.save(img_byte_arr, format=format)
base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
return f"data:image/{format.lower()};base64,{base64_encoded}"
def generate_initial_image(
self,
prompt: str,
width: int = 768,
height: int = 480
) -> Image.Image:
"""
Generate initial image using text prompt.
Args:
prompt (str): Image generation prompt
width (int): Image width
height (int): Image height
Returns:
Image.Image: Generated image
"""
payload = {
"inputs": prompt,
"parameters": {
"width": width,
"height": height
}
}
response = self._make_api_call(
self.endpoints['image_gen'],
payload
)
return self._decode_image_response(response)
def iterative_outpainting(
self,
image: Image.Image,
prompt: str,
iterations: int = 3,
padding_size: int = 256
) -> Image.Image:
"""
Perform iterative outpainting to expand image.
Args:
image (Image.Image): Starting image
prompt (str): Outpainting generation prompt
iterations (int): Number of outpainting steps
padding_size (int): Padding size for each iteration
Returns:
Image.Image: Final outpainted image
"""
current_image = image.copy()
for _ in range(iterations):
# Create padded image
padded_size = (
current_image.width + 2 * padding_size,
current_image.height + 2 * padding_size
)
padded_image = Image.new('RGBA', padded_size, (0, 0, 0, 0))
padded_image.paste(
current_image,
(padding_size, padding_size)
)
# Create mask for padding regions
mask = self._create_padding_mask(padded_image, padding_size)
# Outpainting request
payload = {
"inputs": prompt,
"image": self.encode_image(padded_image),
"mask_image": self.encode_image(mask)
}
response = self._make_api_call(
self.endpoints['outpainting'],
payload
)
current_image = self._decode_image_response(response)
return current_image
def _create_padding_mask(
self,
image: Image.Image,
padding_size: int
) -> Image.Image:
"""
Generate a mask indicating padding regions.
Args:
image (Image.Image): Source image
padding_size (int): Size of padding
Returns:
Image.Image: Mask image
"""
mask = Image.new('L', image.size, 0)
mask_array = np.array(mask)
# Mark padding regions white (255)
mask_array[:padding_size, :] = 255 # Top
mask_array[-padding_size:, :] = 255 # Bottom
mask_array[:, :padding_size] = 255 # Left
mask_array[:, -padding_size:] = 255 # Right
return Image.fromarray(mask_array)
def generate_ltx_video(
self,
image: Image.Image,
prompt: str = "",
video_config: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Generate video using LTX video generation API.
Args:
image (Image.Image): Input image
prompt (str, optional): Optional video generation prompt
video_config (Dict, optional): Custom video generation parameters
Returns:
Dict: API response containing video generation details
"""
default_config = {
"width": 768,
"height": 480,
"num_frames": 129, # 8*16 + 1
"num_inference_steps": 50,
"guidance_scale": 4.0,
"double_num_frames": True,
"fps": 60,
"super_resolution": True,
"grain_amount": 12
}
# Merge default and custom configurations
config = {**default_config, **(video_config or {})}
payload = {
"inputs": {
"image": self.encode_image(image),
"prompt": prompt
},
"parameters": config
}
return self._make_api_call(
self.endpoints['ltx_video'],
payload
)
def _make_api_call(
self,
endpoint: str,
payload: Dict[str, Any]
) -> Dict[str, Any]:
"""
Execute API request with error handling.
Args:
endpoint (str): API endpoint URL
payload (Dict): Request payload
Returns:
Dict: API response
"""
headers = {
"Authorization": f"Bearer {self.api_token}",
"Content-Type": "application/json",
"Accept": "application/json"
}
try:
response = requests.post(
endpoint,
headers=headers,
json=payload
)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
raise RuntimeError(f"API call failed: {e}")
def _decode_image_response(
self,
response: Dict[str, Any]
) -> Image.Image:
"""
Decode image from API response.
Args:
response (Dict): API response
Returns:
Image.Image: Decoded image
"""
if 'image' not in response:
raise ValueError("No image found in API response")
image_data = response['image'].split(",")[1]
image_bytes = base64.b64decode(image_data)
return Image.open(io.BytesIO(image_bytes))
def full_pipeline(
self,
initial_prompt: str,
outpainting_prompt: Optional[str] = None,
video_prompt: Optional[str] = None
) -> Dict[str, Any]:
"""
Execute complete image-to-video pipeline.
Args:
initial_prompt (str): Prompt for initial image generation
outpainting_prompt (str, optional): Prompt for image expansion
video_prompt (str, optional): Prompt for video generation
Returns:
Dict: Pipeline execution results
"""
# 1. Generate Initial Image
initial_image = self.generate_initial_image(initial_prompt)
# 2. Outpainting (optional)
if outpainting_prompt:
expanded_image = self.iterative_outpainting(
initial_image,
outpainting_prompt
)
else:
expanded_image = initial_image
# 3. Video Generation
video_response = self.generate_ltx_video(
expanded_image,
video_prompt
)
return {
"initial_image": initial_image,
"expanded_image": expanded_image,
"video_response": video_response
}
def main():
"""
Demonstration of full AI Image-to-Video pipeline.
"""
pipeline = AIImageVideoPipeline(
image_generation_endpoint="YOUR_IMAGE_GEN_ENDPOINT",
outpainting_endpoint="YOUR_OUTPAINTING_ENDPOINT",
ltx_video_endpoint="YOUR_LTX_VIDEO_ENDPOINT",
api_token="YOUR_HF_API_TOKEN"
)
try:
result = pipeline.full_pipeline(
initial_prompt="Serene landscape with mountains and a lake",
outpainting_prompt="Expand the scene with more natural elements",
video_prompt="Smooth camera pan across the landscape"
)
# Save images and process video
result['initial_image'].save("initial_image.png")
result['expanded_image'].save("expanded_image.png")
print("Pipeline execution completed successfully!")
except Exception as e:
print(f"Pipeline execution failed: {e}")
if __name__ == "__main__":
main()