Chroma-Extra / llm_inference_video.py
gokaygokay's picture
fix models
958e293
raw
history blame
9.38 kB
import os
import random
from groq import Groq
from openai import OpenAI
from gradio_client import Client
class VideoLLMInferenceNode:
def __init__(self):
self.groq_api_key = os.getenv("GROQ_API_KEY")
self.sambanova_api_key = os.getenv("SAMBANOVA_API_KEY")
self.groq_client = Groq(api_key=self.groq_api_key)
self.sambanova_client = OpenAI(
api_key=self.sambanova_api_key,
base_url="https://api.sambanova.ai/v1",
)
def generate_video_prompt(
self,
input_concept,
style,
camera_style,
camera_direction,
pacing,
special_effects,
custom_elements,
provider="SambaNova",
model=None,
prompt_length="Medium"
):
try:
# Helper function to format optional elements
def format_element(element, element_type):
if element == "None" or not element:
return ""
element_prefixes = {
"camera": "utilizing",
"direction": "with",
"pacing": "with",
"effects": "incorporating"
}
return f" {element_prefixes.get(element_type, '')} {element}"
# Format camera movement combination
camera_movement = ""
if camera_style != "None" and camera_direction != "None":
camera_movement = f"{camera_style} {camera_direction}"
elif camera_style != "None":
camera_movement = camera_style
elif camera_direction != "None":
camera_movement = camera_direction
# Video prompt templates
default_style = "simple" # Changed from "cinematic" to "simple" as default
prompt_templates = {
"minimalist": f"""Create an elegantly sparse video description focusing on {input_concept}.
{format_element(camera_movement, 'camera')}
{format_element(pacing, 'pacing')}
{format_element(special_effects, 'effects')}
{' with ' + custom_elements if custom_elements else ''}.""",
"dynamic": f"""Craft an energetic, fast-paced paragraph showcasing {input_concept} in constant motion. Utilize bold {camera_style} movements and {pacing} rhythm to create momentum. Layer {special_effects} effects and {custom_elements if custom_elements else 'powerful visual elements'} to maintain high energy throughout.""",
"simple": f"""Create a straightforward, easy-to-understand paragraph describing a video about {input_concept}. Use {camera_style} camera work and {pacing} pacing. Keep the visuals clear and uncomplicated, incorporating {special_effects} effects and {custom_elements if custom_elements else 'basic visual elements'} in an accessible way.""",
"detailed": f"""Construct a meticulous, technically precise paragraph outlining a video about {input_concept}. Incorporate specific details about {camera_style} cinematography, {pacing} timing, and {special_effects} effects. Include {custom_elements if custom_elements else 'precise technical elements'} while maintaining clarity and depth.""",
"descriptive": f"""Write a richly descriptive paragraph for a video exploring {input_concept}. Paint a vivid picture using sensory details, incorporating {camera_style} movement, {pacing} flow, and {special_effects} effects. Emphasize texture, color, and atmosphere, enhanced by {custom_elements if custom_elements else 'evocative visual elements'}.""",
"cinematic": f"""Create a single, detailed paragraph describing a cinematic video that captures {input_concept}. Focus on creating a cohesive narrative that incorporates {style} visual aesthetics, {camera_style} camera work, {pacing} pacing, and {special_effects} effects. Include atmospheric elements like {custom_elements if custom_elements else 'mood lighting and environmental details'} to enhance the storytelling. Describe the visual journey without technical timestamps or shot lists.""",
"documentary": f"""Write a comprehensive paragraph for a documentary-style video exploring {input_concept}. Blend observational footage with {camera_style} cinematography, incorporating {pacing} editorial rhythm and {special_effects} visual treatments. Focus on creating an immersive narrative that educates and engages, enhanced by {custom_elements if custom_elements else 'authentic moments and natural lighting'}.""",
"animation": f"""Compose a vivid paragraph describing a {style} animated video showcasing {input_concept}. Detail the unique visual style, character movements, and world-building elements, incorporating {camera_style} perspectives and {pacing} story flow. Include {special_effects} animation effects and {custom_elements if custom_elements else 'signature artistic elements'} to create a memorable visual experience.""",
"action": f"""Craft an energetic paragraph describing an action sequence centered on {input_concept}. Emphasize the dynamic flow of action using {camera_style} cinematography, {pacing} rhythm, and {special_effects} visual effects. Incorporate {style} stylistic choices and {custom_elements if custom_elements else 'impactful moments'} to create an adrenaline-pumping experience.""",
"experimental": f"""Create an avant-garde paragraph describing an experimental video exploring {input_concept}. Embrace unconventional storytelling through {style} aesthetics, {camera_style} techniques, and {pacing} temporal flow. Incorporate {special_effects} digital manipulations and {custom_elements if custom_elements else 'abstract visual metaphors'} to challenge traditional narrative structures."""
}
# Get the template with a more neutral default
selected_style = style.lower()
if selected_style not in prompt_templates:
print(f"Warning: Style '{style}' not found, using '{default_style}' template")
selected_style = default_style
base_prompt = prompt_templates[selected_style]
# Configure length requirements
length_config = {
"Short": {
"guidance": "Create exactly very short, ONE impactful sentence that captures the essence of the video. Be concise but descriptive.",
"structure": "Combine all elements into a single, powerful sentence."
},
"Medium": {
"guidance": "Create 2-3 flowing sentences that paint a picture of the video.",
"structure": "First sentence should set the scene, followed by 1-2 sentences developing the concept."
},
"Long": {
"guidance": "Create 4-5 detailed sentences that thoroughly describe the video.",
"structure": "Begin with the setting, develop the action/movement, and conclude with impact."
}
}
config = length_config[prompt_length]
system_message = f"""You are a visionary video director and creative storyteller. {config['guidance']}
Structure: {config['structure']}
Focus on these elements while maintaining the specified sentence count:
1. Visual atmosphere and mood
2. Camera movement and cinematography
3. Narrative flow
4. Style and aesthetic choices
5. Key moments
6. Emotional impact
IMPORTANT REQUIREMENTS:
- Deliver exactly the specified number of sentences
- Short: ONE sentence
- Medium: TWO to THREE sentences
- Long: FOUR to FIVE sentences
- If camera movements are specified, you MUST incorporate them into the description
- Keep everything in a single paragraph format
- Avoid technical specifications or shot lists
- Avoid starting with 'The video opens with...' or 'The video starts with...'"""
# Format the user prompt with style guidance and camera movement
user_prompt = f"""Style Guide: {selected_style.capitalize()} Style
{prompt_templates[selected_style]}
Camera Movement: {camera_movement if camera_movement else 'No specific camera movement'}
Core Concept: {input_concept}
Please create a {prompt_length.lower()}-length description incorporating these elements into a cohesive narrative."""
# Select provider
if provider == "Groq":
client = self.groq_client
model = model or "llama-3.3-70b-versatile"
else: # SambaNova as default
client = self.sambanova_client
model = model or "Meta-Llama-3.1-70B-Instruct"
response = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": user_prompt}
],
temperature=1.2,
top_p=0.95,
seed=random.randint(0, 10000)
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"Error generating video prompt: {str(e)}"