Chroma-Extra

Running on Zero

App Files Files Community

gokaygokay commited on Mar 2

Commit

b1c0860

1 Parent(s): eeb2755

add init

Browse files

Files changed (2) hide show

app.py +68 -41
llm_inference_video.py +66 -22

app.py CHANGED Viewed

@@ -1,23 +1,73 @@
 import torch
 import gradio as gr
-from vlm_captions import VLMCaptioning
-# Initialize the VLMCaptioning model once at startup
-print("Initializing Video Prompt Generator...")
-vlm_captioner = VLMCaptioning()
-print("Video Prompt Generator initialized successfully!")
-# Import VideoLLMInferenceNode after VLMCaptioning initialization
-from llm_inference_video import VideoLLMInferenceNode
 title = """<h1 align="center">AI Video Prompt Generator</h1>
 <p align="center">Generate creative video prompts with technical specifications</p>
 <p align="center">You can use prompts with Kling, MiniMax, Hunyuan, Haiper, CogVideoX, Luma, LTX, Runway, PixVerse. </p>"""
-def create_video_interface():
-    # Pass the already initialized vlm_captioner to avoid serialization issues
-    llm_node = VideoLLMInferenceNode(vlm_captioner)
     with gr.Blocks(theme='bethecloud/storj_theme') as demo:
         gr.HTML(title)
@@ -128,7 +178,7 @@ def create_video_interface():
             provider.change(update_models, inputs=provider, outputs=model)
             generate_btn.click(
-                llm_node.generate_video_prompt,
                 inputs=[input_concept, style, camera_style, camera_direction, pacing, special_effects,
                        custom_elements, provider, model, prompt_length],
                 outputs=output
@@ -151,45 +201,22 @@ def create_video_interface():
                     analyze_video_btn = gr.Button("Analyze Video")
                     video_output = gr.Textbox(label="Video Analysis", lines=10)
-            # Use direct function calls to avoid serialization issues
             analyze_image_btn.click(
-                describe_image_interface,
                 inputs=[image_input, image_question],
                 outputs=image_output
             )
             analyze_video_btn.click(
-                describe_video_interface,
                 inputs=video_input,
                 outputs=video_output
             )
     return demo
-# Define these functions at the module level to avoid pickling issues
-def describe_image_interface(image, question="Describe this image in detail."):
-    """Interface function for image description"""
-    if image is None:
-        return "Please upload an image."
-    if not question or question.strip() == "":
-        question = "Describe this image in detail."
-    return vlm_captioner.describe_image(
-        image=image,
-        question=question
-    )
-def describe_video_interface(video, frame_interval=30):
-    """Interface function for video description"""
-    if video is None:
-        return "Please upload a video."
-    return vlm_captioner.describe_video(
-        video_path=video,
-        frame_interval=frame_interval
-    )
 if __name__ == "__main__":
     demo = create_video_interface()
-    demo.launch(share=True)

 import torch
 import gradio as gr
+import spaces
+# Create Gradio UI without loading models first
 title = """<h1 align="center">AI Video Prompt Generator</h1>
 <p align="center">Generate creative video prompts with technical specifications</p>
 <p align="center">You can use prompts with Kling, MiniMax, Hunyuan, Haiper, CogVideoX, Luma, LTX, Runway, PixVerse. </p>"""
+# Import these at global scope but don't instantiate yet
+from vlm_captions import VLMCaptioning
+from llm_inference_video import VideoLLMInferenceNode
+# Global singleton instances - we'll initialize them only when needed
+vlm_captioner = None
+llm_node = None
+# Initialize only once on first use
+def get_vlm_captioner():
+    global vlm_captioner
+    if vlm_captioner is None:
+        print("Initializing Video Prompt Generator...")
+        vlm_captioner = VLMCaptioning()
+        print("Video Prompt Generator initialized successfully!")
+    return vlm_captioner
+def get_llm_node():
+    global llm_node
+    if llm_node is None:
+        llm_node = VideoLLMInferenceNode()
+    return llm_node
+# Wrapper functions that avoid passing the model between processes
+@spaces.GPU()
+def describe_image_wrapper(image, question="Describe this image in detail."):
+    """GPU-decorated function for image description"""
+    if image is None:
+        return "Please upload an image."
+    if not question or question.strip() == "":
+        question = "Describe this image in detail."
+    # Get the captioner inside this GPU-decorated function
+    vlm = get_vlm_captioner()
+    return vlm.describe_image(image=image, question=question)
+@spaces.GPU()
+def describe_video_wrapper(video, frame_interval=30):
+    """GPU-decorated function for video description"""
+    if video is None:
+        return "Please upload a video."
+    # Get the captioner inside this GPU-decorated function
+    vlm = get_vlm_captioner()
+    return vlm.describe_video(video_path=video, frame_interval=frame_interval)
+def generate_video_prompt_wrapper(
+    concept, style, camera_style, camera_direction,
+    pacing, special_effects, custom_elements,
+    provider, model, prompt_length
+):
+    """Wrapper for LLM prompt generation"""
+    node = get_llm_node()
+    return node.generate_video_prompt(
+        concept, style, camera_style, camera_direction,
+        pacing, special_effects, custom_elements,
+        provider, model, prompt_length
+    )
+def create_video_interface():
     with gr.Blocks(theme='bethecloud/storj_theme') as demo:
         gr.HTML(title)
             provider.change(update_models, inputs=provider, outputs=model)
             generate_btn.click(
+                generate_video_prompt_wrapper,
                 inputs=[input_concept, style, camera_style, camera_direction, pacing, special_effects,
                        custom_elements, provider, model, prompt_length],
                 outputs=output
                     analyze_video_btn = gr.Button("Analyze Video")
                     video_output = gr.Textbox(label="Video Analysis", lines=10)
+            # Use GPU-decorated wrapper functions directly
             analyze_image_btn.click(
+                describe_image_wrapper,
                 inputs=[image_input, image_question],
                 outputs=image_output
             )
             analyze_video_btn.click(
+                describe_video_wrapper,
                 inputs=video_input,
                 outputs=video_output
             )
     return demo
 if __name__ == "__main__":
     demo = create_video_interface()
+    # Don't use share=True on Hugging Face Spaces
+    demo.launch()

llm_inference_video.py CHANGED Viewed

@@ -7,29 +7,34 @@ import tempfile
 from PIL import Image
 from groq import Groq
 from openai import OpenAI
-from vlm_captions import VLMCaptioning
 class VideoLLMInferenceNode:
-    def __init__(self, vlm_captioner=None):
         """
-        Initialize the VideoLLMInferenceNode
-        Args:
-            vlm_captioner: The already initialized VLMCaptioning instance to use
         """
-        self.vlm = vlm_captioner
         self.sambanova_api_key = os.environ.get("SAMBANOVA_API_KEY", "")
         self.groq_api_key = os.environ.get("GROQ_API_KEY", "")
-        self.groq_client = Groq(api_key=self.groq_api_key)
-        self.sambanova_client = OpenAI(
-            api_key=self.sambanova_api_key,
-            base_url="https://api.sambanova.ai/v1",
-        )
     def analyze_image(self, image_path: str, question: Optional[str] = None) -> str:
         """
-        Analyze an image using the VLM model
         Args:
             image_path: Path to the image file
@@ -45,14 +50,17 @@ class VideoLLMInferenceNode:
             question = "Describe this image in detail."
         try:
-            # Use the passed vlm_captioner instance
-            return self.vlm.describe_image(image_path, question)
         except Exception as e:
             return f"Error analyzing image: {str(e)}"
     def analyze_video(self, video_path: str) -> str:
         """
-        Analyze a video using the VLM model
         Args:
             video_path: Path to the video file
@@ -64,8 +72,10 @@ class VideoLLMInferenceNode:
             return "Please upload a video."
         try:
-            # Use the passed vlm_captioner instance
-            return self.vlm.describe_video(video_path)
         except Exception as e:
             return f"Error analyzing video: {str(e)}"
@@ -147,16 +157,36 @@ The prompt should be detailed and technical, specifically mentioning camera angl
         # Call the appropriate API based on provider
         try:
             if provider == "SambaNova":
-                return self._call_sambanova_api(system_message, user_message, model)
             elif provider == "Groq":
-                return self._call_groq_api(system_message, user_message, model)
             else:
                 return "Unsupported provider. Please select SambaNova or Groq."
         except Exception as e:
             return f"Error generating prompt: {str(e)}"
     def _call_sambanova_api(self, system_message: str, user_message: str, model: str) -> str:
-        """Call the SambaNova API for prompt generation"""
         if not self.sambanova_api_key:
             return "SambaNova API key not configured. Please set the SAMBANOVA_API_KEY environment variable."
@@ -182,8 +212,22 @@ The prompt should be detailed and technical, specifically mentioning camera angl
         else:
             return f"Error from SambaNova API: {response.status_code} - {response.text}"
     def _call_groq_api(self, system_message: str, user_message: str, model: str) -> str:
-        """Call the Groq API for prompt generation"""
         if not self.groq_api_key:
             return "Groq API key not configured. Please set the GROQ_API_KEY environment variable."

 from PIL import Image
 from groq import Groq
 from openai import OpenAI
+import spaces
 class VideoLLMInferenceNode:
+    def __init__(self):
         """
+        Initialize the VideoLLMInferenceNode without VLM captioning dependency
         """
         self.sambanova_api_key = os.environ.get("SAMBANOVA_API_KEY", "")
         self.groq_api_key = os.environ.get("GROQ_API_KEY", "")
+        # Initialize API clients if keys are available
+        if self.groq_api_key:
+            self.groq_client = Groq(api_key=self.groq_api_key)
+        else:
+            self.groq_client = None
+        if self.sambanova_api_key:
+            self.sambanova_client = OpenAI(
+                api_key=self.sambanova_api_key,
+                base_url="https://api.sambanova.ai/v1",
+            )
+        else:
+            self.sambanova_client = None
+    @spaces.GPU()
     def analyze_image(self, image_path: str, question: Optional[str] = None) -> str:
         """
+        Analyze an image using VLM model directly
         Args:
             image_path: Path to the image file
             question = "Describe this image in detail."
         try:
+            # Import and use VLMCaptioning within this GPU-scoped function
+            from app import get_vlm_captioner
+            vlm = get_vlm_captioner()
+            return vlm.describe_image(image_path, question)
         except Exception as e:
             return f"Error analyzing image: {str(e)}"
+    @spaces.GPU()
     def analyze_video(self, video_path: str) -> str:
         """
+        Analyze a video using VLM model directly
         Args:
             video_path: Path to the video file
             return "Please upload a video."
         try:
+            # Import and use VLMCaptioning within this GPU-scoped function
+            from app import get_vlm_captioner
+            vlm = get_vlm_captioner()
+            return vlm.describe_video(video_path)
         except Exception as e:
             return f"Error analyzing video: {str(e)}"
         # Call the appropriate API based on provider
         try:
             if provider == "SambaNova":
+                if self.sambanova_client:
+                    return self._call_sambanova_client(system_message, user_message, model)
+                else:
+                    return self._call_sambanova_api(system_message, user_message, model)
             elif provider == "Groq":
+                if self.groq_client:
+                    return self._call_groq_client(system_message, user_message, model)
+                else:
+                    return self._call_groq_api(system_message, user_message, model)
             else:
                 return "Unsupported provider. Please select SambaNova or Groq."
         except Exception as e:
             return f"Error generating prompt: {str(e)}"
+    def _call_sambanova_client(self, system_message: str, user_message: str, model: str) -> str:
+        """Call the SambaNova API using the client library"""
+        try:
+            chat_completion = self.sambanova_client.chat.completions.create(
+                model=model,
+                messages=[
+                    {"role": "system", "content": system_message},
+                    {"role": "user", "content": user_message}
+                ]
+            )
+            return chat_completion.choices[0].message.content
+        except Exception as e:
+            return f"Error from SambaNova API: {str(e)}"
     def _call_sambanova_api(self, system_message: str, user_message: str, model: str) -> str:
+        """Call the SambaNova API using direct HTTP requests"""
         if not self.sambanova_api_key:
             return "SambaNova API key not configured. Please set the SAMBANOVA_API_KEY environment variable."
         else:
             return f"Error from SambaNova API: {response.status_code} - {response.text}"
+    def _call_groq_client(self, system_message: str, user_message: str, model: str) -> str:
+        """Call the Groq API using the client library"""
+        try:
+            chat_completion = self.groq_client.chat.completions.create(
+                model=model,
+                messages=[
+                    {"role": "system", "content": system_message},
+                    {"role": "user", "content": user_message}
+                ]
+            )
+            return chat_completion.choices[0].message.content
+        except Exception as e:
+            return f"Error from Groq API: {str(e)}"
     def _call_groq_api(self, system_message: str, user_message: str, model: str) -> str:
+        """Call the Groq API using direct HTTP requests"""
         if not self.groq_api_key:
             return "Groq API key not configured. Please set the GROQ_API_KEY environment variable."