Chroma-Extra

Running on Zero

App Files Files Community

Chroma-Extra / app.py

gokaygokay

add init

eeb2755 6 months ago

raw

history blame

8.79 kB

	import torch
	import gradio as gr
	from vlm_captions import VLMCaptioning

	# Initialize the VLMCaptioning model once at startup
	print("Initializing Video Prompt Generator...")
	vlm_captioner = VLMCaptioning()
	print("Video Prompt Generator initialized successfully!")

	# Import VideoLLMInferenceNode after VLMCaptioning initialization
	from llm_inference_video import VideoLLMInferenceNode

	title = """<h1 align="center">AI Video Prompt Generator</h1>
	<p align="center">Generate creative video prompts with technical specifications</p>
	<p align="center">You can use prompts with Kling, MiniMax, Hunyuan, Haiper, CogVideoX, Luma, LTX, Runway, PixVerse. </p>"""

	def create_video_interface():
	# Pass the already initialized vlm_captioner to avoid serialization issues
	llm_node = VideoLLMInferenceNode(vlm_captioner)

	with gr.Blocks(theme='bethecloud/storj_theme') as demo:
	gr.HTML(title)

	with gr.Tab("Video Prompt Generator"):
	with gr.Row():
	with gr.Column(scale=1):
	input_concept = gr.Textbox(label="Core Concept/Thematic Input", lines=3)
	style = gr.Dropdown(
	choices=["Minimalist", "Simple", "Detailed", "Descriptive", "Dynamic",
	"Cinematic", "Documentary", "Animation", "Action", "Experimental"],
	value="Simple",
	label="Video Style"
	)
	custom_elements = gr.Textbox(label="Custom Technical Elements",
	placeholder="e.g., Infrared hybrid, Datamosh transitions")
	prompt_length = gr.Dropdown(
	choices=["Short", "Medium", "Long"],
	value="Medium",
	label="Prompt Length"
	)

	with gr.Column(scale=1):
	camera_direction = gr.Dropdown(
	choices=[
	"None",
	"Zoom in", "Zoom out", "Pan left", "Pan right",
	"Tilt up", "Tilt down", "Orbital rotation",
	"Push in", "Pull out", "Track forward", "Track backward",
	"Spiral in", "Spiral out", "Arc movement",
	"Diagonal traverse", "Vertical rise", "Vertical descent"
	],
	value="None",
	label="Camera Direction"
	)

	camera_style = gr.Dropdown(
	choices=[
	"None",
	"Steadicam flow", "Drone aerials", "Handheld urgency", "Crane elegance",
	"Dolly precision", "VR 360", "Multi-angle rig", "Static tripod",
	"Gimbal smoothness", "Slider motion", "Jib sweep", "POV immersion",
	"Time-slice array", "Macro extreme", "Tilt-shift miniature",
	"Snorricam character", "Whip pan dynamics", "Dutch angle tension",
	"Underwater housing", "Periscope lens"
	],
	value="None",
	label="Camera Movement Style"
	)

	pacing = gr.Dropdown(
	choices=[
	"None",
	"Slow burn", "Rhythmic pulse", "Frantic energy", "Ebb and flow",
	"Hypnotic drift", "Time-lapse rush", "Stop-motion staccato",
	"Gradual build", "Quick cut rhythm", "Long take meditation",
	"Jump cut energy", "Match cut flow", "Cross-dissolve dreamscape",
	"Parallel action", "Slow motion impact", "Ramping dynamics",
	"Montage tempo", "Continuous flow", "Episodic breaks"
	],
	value="None",
	label="Pacing Rhythm"
	)
	special_effects = gr.Dropdown(
	choices=[
	"None",
	"Practical effects", "CGI enhancement", "Analog glitches",
	"Light painting", "Projection mapping", "Nanosecond exposures",
	"Double exposure", "Smoke diffusion", "Lens flare artistry",
	"Particle systems", "Holographic overlay", "Chromatic aberration",
	"Digital distortion", "Wire removal", "Motion capture",
	"Miniature integration", "Weather simulation", "Color grading",
	"Mixed media composite", "Neural style transfer"
	],
	value="None",
	label="SFX Approach"
	)

	with gr.Column(scale=1):
	provider = gr.Dropdown(
	choices=["SambaNova", "Groq"],
	value="SambaNova",
	label="LLM Provider"
	)
	model = gr.Dropdown(
	choices=[
	"Meta-Llama-3.1-70B-Instruct",
	"Meta-Llama-3.1-405B-Instruct",
	"Meta-Llama-3.1-8B-Instruct"
	],
	value="Meta-Llama-3.1-70B-Instruct",
	label="Model"
	)

	generate_btn = gr.Button("Generate Video Prompt", variant="primary")
	output = gr.Textbox(label="Generated Prompt", lines=12, show_copy_button=True)

	def update_models(provider):
	models = {
	"Groq": ["llama-3.3-70b-versatile"],
	"SambaNova": [
	"Meta-Llama-3.1-70B-Instruct",
	"Meta-Llama-3.1-405B-Instruct",
	"Meta-Llama-3.1-8B-Instruct"
	]
	}
	return gr.Dropdown(choices=models[provider], value=models[provider][0])

	provider.change(update_models, inputs=provider, outputs=model)

	generate_btn.click(
	llm_node.generate_video_prompt,
	inputs=[input_concept, style, camera_style, camera_direction, pacing, special_effects,
	custom_elements, provider, model, prompt_length],
	outputs=output
	)

	with gr.Tab("Visual Analysis"):
	with gr.Row():
	with gr.Column():
	image_input = gr.Image(label="Upload Image", type="filepath")
	image_question = gr.Textbox(
	label="Question (optional)",
	placeholder="What is in this image?"
	)

	analyze_image_btn = gr.Button("Analyze Image")
	image_output = gr.Textbox(label="Analysis Result", lines=5)

	with gr.Column():
	video_input = gr.Video(label="Upload Video")
	analyze_video_btn = gr.Button("Analyze Video")
	video_output = gr.Textbox(label="Video Analysis", lines=10)

	# Use direct function calls to avoid serialization issues
	analyze_image_btn.click(
	describe_image_interface,
	inputs=[image_input, image_question],
	outputs=image_output
	)

	analyze_video_btn.click(
	describe_video_interface,
	inputs=video_input,
	outputs=video_output
	)

	return demo

	# Define these functions at the module level to avoid pickling issues
	def describe_image_interface(image, question="Describe this image in detail."):
	"""Interface function for image description"""
	if image is None:
	return "Please upload an image."

	if not question or question.strip() == "":
	question = "Describe this image in detail."

	return vlm_captioner.describe_image(
	image=image,
	question=question
	)

	def describe_video_interface(video, frame_interval=30):
	"""Interface function for video description"""
	if video is None:
	return "Please upload a video."

	return vlm_captioner.describe_video(
	video_path=video,
	frame_interval=frame_interval
	)

	if __name__ == "__main__":
	demo = create_video_interface()
	demo.launch(share=True)