Sergidev's picture
Initial Commit
1eb7029
raw
history blame
1.49 kB
import gradio as gr
from smolagents import CodeAgent, HfApiModel
from gradio_client import Client
import tempfile
# Initialize Qwen2.5-Omni-7B via SmolAgents
qwen_agent = CodeAgent(
tools=[], # Add multimodal tools as needed
model=HfApiModel("Qwen/Qwen2.5-Omni-7B"),
execution_timeout=120
)
def process_video(video_path, prompt, request: gr.Request):
# Handle ZeroGPU rate limiting
headers = {"X-IP-Token": request.headers.get('x-ip-token', '')}
# Process video with Qwen's multimodal capabilities
response = qwen_agent.run(
f"Analyze this video: {video_path} and {prompt}",
headers=headers
)
# Generate real-time speech response
audio_response = qwen_agent.model.generate_speech(response)
return response, audio_response
with gr.Blocks() as demo:
gr.Markdown("## Multimodal AI Demo with Qwen2.5-Omni-7B")
with gr.Row():
video_input = gr.Video(label="Upload Video", sources=["upload"])
prompt_input = gr.Textbox(label="Analysis Prompt")
submit_btn = gr.Button("Analyze")
with gr.Column():
text_output = gr.Textbox(label="Analysis Results")
audio_output = gr.Audio(label="Voice Response", autoplay=True)
submit_btn.click(
process_video,
inputs=[video_input, prompt_input, gr.Request()],
outputs=[text_output, audio_output]
)
# ZeroGPU configuration
demo.queue(default_concurrency_limit=5)
demo.launch(server_name="0.0.0.0", server_port=7860)