import os import base64 import io import requests from typing import Dict, Any, Optional, List from PIL import Image import numpy as np class AIImageVideoPipeline: """ Comprehensive AI-powered Image-to-Video Generation Pipeline ## Workflow Stages 1. Initial Image Generation 2. Iterative Outpainting 3. LTX Video Transformation ## Technical Architecture - Modular design with configurable components - Support for multiple AI inference endpoints - Robust error handling and logging """ def __init__( self, image_generation_endpoint: Optional[str] = None, outpainting_endpoint: Optional[str] = None, ltx_video_endpoint: Optional[str] = None, api_token: Optional[str] = None ): """ Initialize the AI Image-to-Video pipeline. Args: image_generation_endpoint (str): Endpoint for initial image generation outpainting_endpoint (str): Endpoint for image outpainting ltx_video_endpoint (str): Endpoint for LTX video generation api_token (str): Authentication token for API calls """ self.endpoints = { 'image_gen': image_generation_endpoint or os.getenv('IMAGE_GEN_ENDPOINT'), 'outpainting': outpainting_endpoint or os.getenv('OUTPAINTING_ENDPOINT'), 'ltx_video': ltx_video_endpoint or os.getenv('LTX_VIDEO_ENDPOINT') } self.api_token = api_token or os.getenv('HF_API_TOKEN') # Validate endpoint configuration self._validate_endpoints() def _validate_endpoints(self): """ Validate configured API endpoints. Raises: ValueError: If any required endpoint is missing """ missing_endpoints = [ key for key, value in self.endpoints.items() if not value ] if missing_endpoints: raise ValueError( f"Missing API endpoints: {', '.join(missing_endpoints)}. " "Please configure via parameters or environment variables." ) def encode_image( self, image: Image.Image, format: str = 'JPEG' ) -> str: """ Encode PIL Image to base64 data URI. Args: image (Image.Image): Input image format (str): Output image format Returns: str: Base64 encoded data URI """ img_byte_arr = io.BytesIO() image.save(img_byte_arr, format=format) base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8') return f"data:image/{format.lower()};base64,{base64_encoded}" def generate_initial_image( self, prompt: str, width: int = 768, height: int = 480 ) -> Image.Image: """ Generate initial image using text prompt. Args: prompt (str): Image generation prompt width (int): Image width height (int): Image height Returns: Image.Image: Generated image """ payload = { "inputs": prompt, "parameters": { "width": width, "height": height } } response = self._make_api_call( self.endpoints['image_gen'], payload ) return self._decode_image_response(response) def iterative_outpainting( self, image: Image.Image, prompt: str, iterations: int = 3, padding_size: int = 256 ) -> Image.Image: """ Perform iterative outpainting to expand image. Args: image (Image.Image): Starting image prompt (str): Outpainting generation prompt iterations (int): Number of outpainting steps padding_size (int): Padding size for each iteration Returns: Image.Image: Final outpainted image """ current_image = image.copy() for _ in range(iterations): # Create padded image padded_size = ( current_image.width + 2 * padding_size, current_image.height + 2 * padding_size ) padded_image = Image.new('RGBA', padded_size, (0, 0, 0, 0)) padded_image.paste( current_image, (padding_size, padding_size) ) # Create mask for padding regions mask = self._create_padding_mask(padded_image, padding_size) # Outpainting request payload = { "inputs": prompt, "image": self.encode_image(padded_image), "mask_image": self.encode_image(mask) } response = self._make_api_call( self.endpoints['outpainting'], payload ) current_image = self._decode_image_response(response) return current_image def _create_padding_mask( self, image: Image.Image, padding_size: int ) -> Image.Image: """ Generate a mask indicating padding regions. Args: image (Image.Image): Source image padding_size (int): Size of padding Returns: Image.Image: Mask image """ mask = Image.new('L', image.size, 0) mask_array = np.array(mask) # Mark padding regions white (255) mask_array[:padding_size, :] = 255 # Top mask_array[-padding_size:, :] = 255 # Bottom mask_array[:, :padding_size] = 255 # Left mask_array[:, -padding_size:] = 255 # Right return Image.fromarray(mask_array) def generate_ltx_video( self, image: Image.Image, prompt: str = "", video_config: Optional[Dict[str, Any]] = None ) -> Dict[str, Any]: """ Generate video using LTX video generation API. Args: image (Image.Image): Input image prompt (str, optional): Optional video generation prompt video_config (Dict, optional): Custom video generation parameters Returns: Dict: API response containing video generation details """ default_config = { "width": 768, "height": 480, "num_frames": 129, # 8*16 + 1 "num_inference_steps": 50, "guidance_scale": 4.0, "double_num_frames": True, "fps": 60, "super_resolution": True, "grain_amount": 12 } # Merge default and custom configurations config = {**default_config, **(video_config or {})} payload = { "inputs": { "image": self.encode_image(image), "prompt": prompt }, "parameters": config } return self._make_api_call( self.endpoints['ltx_video'], payload ) def _make_api_call( self, endpoint: str, payload: Dict[str, Any] ) -> Dict[str, Any]: """ Execute API request with error handling. Args: endpoint (str): API endpoint URL payload (Dict): Request payload Returns: Dict: API response """ headers = { "Authorization": f"Bearer {self.api_token}", "Content-Type": "application/json", "Accept": "application/json" } try: response = requests.post( endpoint, headers=headers, json=payload ) response.raise_for_status() return response.json() except requests.RequestException as e: raise RuntimeError(f"API call failed: {e}") def _decode_image_response( self, response: Dict[str, Any] ) -> Image.Image: """ Decode image from API response. Args: response (Dict): API response Returns: Image.Image: Decoded image """ if 'image' not in response: raise ValueError("No image found in API response") image_data = response['image'].split(",")[1] image_bytes = base64.b64decode(image_data) return Image.open(io.BytesIO(image_bytes)) def full_pipeline( self, initial_prompt: str, outpainting_prompt: Optional[str] = None, video_prompt: Optional[str] = None ) -> Dict[str, Any]: """ Execute complete image-to-video pipeline. Args: initial_prompt (str): Prompt for initial image generation outpainting_prompt (str, optional): Prompt for image expansion video_prompt (str, optional): Prompt for video generation Returns: Dict: Pipeline execution results """ # 1. Generate Initial Image initial_image = self.generate_initial_image(initial_prompt) # 2. Outpainting (optional) if outpainting_prompt: expanded_image = self.iterative_outpainting( initial_image, outpainting_prompt ) else: expanded_image = initial_image # 3. Video Generation video_response = self.generate_ltx_video( expanded_image, video_prompt ) return { "initial_image": initial_image, "expanded_image": expanded_image, "video_response": video_response } def main(): """ Demonstration of full AI Image-to-Video pipeline. """ pipeline = AIImageVideoPipeline( image_generation_endpoint="YOUR_IMAGE_GEN_ENDPOINT", outpainting_endpoint="YOUR_OUTPAINTING_ENDPOINT", ltx_video_endpoint="YOUR_LTX_VIDEO_ENDPOINT", api_token="YOUR_HF_API_TOKEN" ) try: result = pipeline.full_pipeline( initial_prompt="Serene landscape with mountains and a lake", outpainting_prompt="Expand the scene with more natural elements", video_prompt="Smooth camera pan across the landscape" ) # Save images and process video result['initial_image'].save("initial_image.png") result['expanded_image'].save("expanded_image.png") print("Pipeline execution completed successfully!") except Exception as e: print(f"Pipeline execution failed: {e}") if __name__ == "__main__": main()