Spaces:

K00B404
/

3Luik

Sleeping

App Files Files Community

K00B404 commited on Jan 23

Commit

cf3c22b

verified ·

1 Parent(s): 0cff3e4

Update poweredimg2vid.py

Browse files

Files changed (1) hide show

poweredimg2vid.py +365 -0

poweredimg2vid.py CHANGED Viewed

	@@ -0,0 +1,365 @@

+import os
+import base64
+import io
+import requests
+from typing import Dict, Any, Optional, List
+from PIL import Image
+import numpy as np
+class AIImageVideoPipeline:
+    """
+    Comprehensive AI-powered Image-to-Video Generation Pipeline
+    ## Workflow Stages
+    1. Initial Image Generation
+    2. Iterative Outpainting
+    3. LTX Video Transformation
+    ## Technical Architecture
+    - Modular design with configurable components
+    - Support for multiple AI inference endpoints
+    - Robust error handling and logging
+    """
+    def __init__(
+        self,
+        image_generation_endpoint: Optional[str] = None,
+        outpainting_endpoint: Optional[str] = None,
+        ltx_video_endpoint: Optional[str] = None,
+        api_token: Optional[str] = None
+    ):
+        """
+        Initialize the AI Image-to-Video pipeline.
+        Args:
+            image_generation_endpoint (str): Endpoint for initial image generation
+            outpainting_endpoint (str): Endpoint for image outpainting
+            ltx_video_endpoint (str): Endpoint for LTX video generation
+            api_token (str): Authentication token for API calls
+        """
+        self.endpoints = {
+            'image_gen': image_generation_endpoint or os.getenv('IMAGE_GEN_ENDPOINT'),
+            'outpainting': outpainting_endpoint or os.getenv('OUTPAINTING_ENDPOINT'),
+            'ltx_video': ltx_video_endpoint or os.getenv('LTX_VIDEO_ENDPOINT')
+        }
+        self.api_token = api_token or os.getenv('HF_API_TOKEN')
+        # Validate endpoint configuration
+        self._validate_endpoints()
+    def _validate_endpoints(self):
+        """
+        Validate configured API endpoints.
+        Raises:
+            ValueError: If any required endpoint is missing
+        """
+        missing_endpoints = [
+            key for key, value in self.endpoints.items()
+            if not value
+        ]
+        if missing_endpoints:
+            raise ValueError(
+                f"Missing API endpoints: {', '.join(missing_endpoints)}. "
+                "Please configure via parameters or environment variables."
+            )
+    def encode_image(
+        self,
+        image: Image.Image,
+        format: str = 'JPEG'
+    ) -> str:
+        """
+        Encode PIL Image to base64 data URI.
+        Args:
+            image (Image.Image): Input image
+            format (str): Output image format
+        Returns:
+            str: Base64 encoded data URI
+        """
+        img_byte_arr = io.BytesIO()
+        image.save(img_byte_arr, format=format)
+        base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
+        return f"data:image/{format.lower()};base64,{base64_encoded}"
+    def generate_initial_image(
+        self,
+        prompt: str,
+        width: int = 768,
+        height: int = 480
+    ) -> Image.Image:
+        """
+        Generate initial image using text prompt.
+        Args:
+            prompt (str): Image generation prompt
+            width (int): Image width
+            height (int): Image height
+        Returns:
+            Image.Image: Generated image
+        """
+        payload = {
+            "inputs": prompt,
+            "parameters": {
+                "width": width,
+                "height": height
+            }
+        }
+        response = self._make_api_call(
+            self.endpoints['image_gen'],
+            payload
+        )
+        return self._decode_image_response(response)
+    def iterative_outpainting(
+        self,
+        image: Image.Image,
+        prompt: str,
+        iterations: int = 3,
+        padding_size: int = 256
+    ) -> Image.Image:
+        """
+        Perform iterative outpainting to expand image.
+        Args:
+            image (Image.Image): Starting image
+            prompt (str): Outpainting generation prompt
+            iterations (int): Number of outpainting steps
+            padding_size (int): Padding size for each iteration
+        Returns:
+            Image.Image: Final outpainted image
+        """
+        current_image = image.copy()
+        for _ in range(iterations):
+            # Create padded image
+            padded_size = (
+                current_image.width + 2 * padding_size,
+                current_image.height + 2 * padding_size
+            )
+            padded_image = Image.new('RGBA', padded_size, (0, 0, 0, 0))
+            padded_image.paste(
+                current_image,
+                (padding_size, padding_size)
+            )
+            # Create mask for padding regions
+            mask = self._create_padding_mask(padded_image, padding_size)
+            # Outpainting request
+            payload = {
+                "inputs": prompt,
+                "image": self.encode_image(padded_image),
+                "mask_image": self.encode_image(mask)
+            }
+            response = self._make_api_call(
+                self.endpoints['outpainting'],
+                payload
+            )
+            current_image = self._decode_image_response(response)
+        return current_image
+    def _create_padding_mask(
+        self,
+        image: Image.Image,
+        padding_size: int
+    ) -> Image.Image:
+        """
+        Generate a mask indicating padding regions.
+        Args:
+            image (Image.Image): Source image
+            padding_size (int): Size of padding
+        Returns:
+            Image.Image: Mask image
+        """
+        mask = Image.new('L', image.size, 0)
+        mask_array = np.array(mask)
+        # Mark padding regions white (255)
+        mask_array[:padding_size, :] = 255  # Top
+        mask_array[-padding_size:, :] = 255  # Bottom
+        mask_array[:, :padding_size] = 255  # Left
+        mask_array[:, -padding_size:] = 255  # Right
+        return Image.fromarray(mask_array)
+    def generate_ltx_video(
+        self,
+        image: Image.Image,
+        prompt: str = "",
+        video_config: Optional[Dict[str, Any]] = None
+    ) -> Dict[str, Any]:
+        """
+        Generate video using LTX video generation API.
+        Args:
+            image (Image.Image): Input image
+            prompt (str, optional): Optional video generation prompt
+            video_config (Dict, optional): Custom video generation parameters
+        Returns:
+            Dict: API response containing video generation details
+        """
+        default_config = {
+            "width": 768,
+            "height": 480,
+            "num_frames": 129,  # 8*16 + 1
+            "num_inference_steps": 50,
+            "guidance_scale": 4.0,
+            "double_num_frames": True,
+            "fps": 60,
+            "super_resolution": True,
+            "grain_amount": 12
+        }
+        # Merge default and custom configurations
+        config = {**default_config, **(video_config or {})}
+        payload = {
+            "inputs": {
+                "image": self.encode_image(image),
+                "prompt": prompt
+            },
+            "parameters": config
+        }
+        return self._make_api_call(
+            self.endpoints['ltx_video'],
+            payload
+        )
+    def _make_api_call(
+        self,
+        endpoint: str,
+        payload: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Execute API request with error handling.
+        Args:
+            endpoint (str): API endpoint URL
+            payload (Dict): Request payload
+        Returns:
+            Dict: API response
+        """
+        headers = {
+            "Authorization": f"Bearer {self.api_token}",
+            "Content-Type": "application/json",
+            "Accept": "application/json"
+        }
+        try:
+            response = requests.post(
+                endpoint,
+                headers=headers,
+                json=payload
+            )
+            response.raise_for_status()
+            return response.json()
+        except requests.RequestException as e:
+            raise RuntimeError(f"API call failed: {e}")
+    def _decode_image_response(
+        self,
+        response: Dict[str, Any]
+    ) -> Image.Image:
+        """
+        Decode image from API response.
+        Args:
+            response (Dict): API response
+        Returns:
+            Image.Image: Decoded image
+        """
+        if 'image' not in response:
+            raise ValueError("No image found in API response")
+        image_data = response['image'].split(",")[1]
+        image_bytes = base64.b64decode(image_data)
+        return Image.open(io.BytesIO(image_bytes))
+    def full_pipeline(
+        self,
+        initial_prompt: str,
+        outpainting_prompt: Optional[str] = None,
+        video_prompt: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """
+        Execute complete image-to-video pipeline.
+        Args:
+            initial_prompt (str): Prompt for initial image generation
+            outpainting_prompt (str, optional): Prompt for image expansion
+            video_prompt (str, optional): Prompt for video generation
+        Returns:
+            Dict: Pipeline execution results
+        """
+        # 1. Generate Initial Image
+        initial_image = self.generate_initial_image(initial_prompt)
+        # 2. Outpainting (optional)
+        if outpainting_prompt:
+            expanded_image = self.iterative_outpainting(
+                initial_image,
+                outpainting_prompt
+            )
+        else:
+            expanded_image = initial_image
+        # 3. Video Generation
+        video_response = self.generate_ltx_video(
+            expanded_image,
+            video_prompt
+        )
+        return {
+            "initial_image": initial_image,
+            "expanded_image": expanded_image,
+            "video_response": video_response
+        }
+def main():
+    """
+    Demonstration of full AI Image-to-Video pipeline.
+    """
+    pipeline = AIImageVideoPipeline(
+        image_generation_endpoint="YOUR_IMAGE_GEN_ENDPOINT",
+        outpainting_endpoint="YOUR_OUTPAINTING_ENDPOINT",
+        ltx_video_endpoint="YOUR_LTX_VIDEO_ENDPOINT",
+        api_token="YOUR_HF_API_TOKEN"
+    )
+    try:
+        result = pipeline.full_pipeline(
+            initial_prompt="Serene landscape with mountains and a lake",
+            outpainting_prompt="Expand the scene with more natural elements",
+            video_prompt="Smooth camera pan across the landscape"
+        )
+        # Save images and process video
+        result['initial_image'].save("initial_image.png")
+        result['expanded_image'].save("expanded_image.png")
+        print("Pipeline execution completed successfully!")
+    except Exception as e:
+        print(f"Pipeline execution failed: {e}")
+if __name__ == "__main__":
+    main()