Sergidev commited on
Commit
1eb7029
Β·
1 Parent(s): e8a8098

Initial Commit

Browse files
Files changed (2) hide show
  1. app.py +49 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from smolagents import CodeAgent, HfApiModel
3
+ from gradio_client import Client
4
+ import tempfile
5
+
6
+ # Initialize Qwen2.5-Omni-7B via SmolAgents
7
+ qwen_agent = CodeAgent(
8
+ tools=[], # Add multimodal tools as needed
9
+ model=HfApiModel("Qwen/Qwen2.5-Omni-7B"),
10
+ execution_timeout=120
11
+ )
12
+
13
+ def process_video(video_path, prompt, request: gr.Request):
14
+ # Handle ZeroGPU rate limiting
15
+ headers = {"X-IP-Token": request.headers.get('x-ip-token', '')}
16
+
17
+ # Process video with Qwen's multimodal capabilities
18
+ response = qwen_agent.run(
19
+ f"Analyze this video: {video_path} and {prompt}",
20
+ headers=headers
21
+ )
22
+
23
+ # Generate real-time speech response
24
+ audio_response = qwen_agent.model.generate_speech(response)
25
+
26
+ return response, audio_response
27
+
28
+ with gr.Blocks() as demo:
29
+ gr.Markdown("## Multimodal AI Demo with Qwen2.5-Omni-7B")
30
+
31
+ with gr.Row():
32
+ video_input = gr.Video(label="Upload Video", sources=["upload"])
33
+ prompt_input = gr.Textbox(label="Analysis Prompt")
34
+
35
+ submit_btn = gr.Button("Analyze")
36
+
37
+ with gr.Column():
38
+ text_output = gr.Textbox(label="Analysis Results")
39
+ audio_output = gr.Audio(label="Voice Response", autoplay=True)
40
+
41
+ submit_btn.click(
42
+ process_video,
43
+ inputs=[video_input, prompt_input, gr.Request()],
44
+ outputs=[text_output, audio_output]
45
+ )
46
+
47
+ # ZeroGPU configuration
48
+ demo.queue(default_concurrency_limit=5)
49
+ demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ smolagents>=0.9.0
2
+ gradio_client>=3.2.0
3
+ qwen2.5-omni>=1.0.0