Spaces:

Sergidev
/

Qwen2.5-OmniAgent

Runtime error

App Files Files Community

Sergidev commited on 14 days ago

Commit

1eb7029

1 Parent(s): e8a8098

Initial Commit

Browse files

Files changed (2) hide show

app.py +49 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import gradio as gr
+from smolagents import CodeAgent, HfApiModel
+from gradio_client import Client
+import tempfile
+# Initialize Qwen2.5-Omni-7B via SmolAgents
+qwen_agent = CodeAgent(
+    tools=[],  # Add multimodal tools as needed
+    model=HfApiModel("Qwen/Qwen2.5-Omni-7B"),
+    execution_timeout=120
+)
+def process_video(video_path, prompt, request: gr.Request):
+    # Handle ZeroGPU rate limiting
+    headers = {"X-IP-Token": request.headers.get('x-ip-token', '')}
+    # Process video with Qwen's multimodal capabilities
+    response = qwen_agent.run(
+        f"Analyze this video: {video_path} and {prompt}",
+        headers=headers
+    )
+    # Generate real-time speech response
+    audio_response = qwen_agent.model.generate_speech(response)
+    return response, audio_response
+with gr.Blocks() as demo:
+    gr.Markdown("## Multimodal AI Demo with Qwen2.5-Omni-7B")
+    with gr.Row():
+        video_input = gr.Video(label="Upload Video", sources=["upload"])
+        prompt_input = gr.Textbox(label="Analysis Prompt")
+    submit_btn = gr.Button("Analyze")
+    with gr.Column():
+        text_output = gr.Textbox(label="Analysis Results")
+        audio_output = gr.Audio(label="Voice Response", autoplay=True)
+    submit_btn.click(
+        process_video,
+        inputs=[video_input, prompt_input, gr.Request()],
+        outputs=[text_output, audio_output]
+    )
+# ZeroGPU configuration
+demo.queue(default_concurrency_limit=5)
+demo.launch(server_name="0.0.0.0", server_port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+smolagents>=0.9.0
+gradio_client>=3.2.0
+qwen2.5-omni>=1.0.0