phani50101 commited on
Commit
f4c79df
·
1 Parent(s): 0a7cc32

Add application file

Browse files
Files changed (1) hide show
  1. app.py +122 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import openvino_genai
3
+ import librosa
4
+ import numpy as np
5
+ from threading import Thread, Lock,Event
6
+ from scipy.ndimage import uniform_filter1d
7
+ from queue import Queue, Empty
8
+
9
+
10
+ # Initialize Mistral pipeline
11
+ mistral_pipe = openvino_genai.LLMPipeline("mistral-ov", device="CPU")
12
+ config = openvino_genai.GenerationConfig(
13
+ max_new_tokens=100, # Increased for better context
14
+ num_beams=1, # Keep greedy search for speed
15
+ do_sample=False, # Disable sampling for faster generation
16
+ temperature=0.0, # Set to 0 since sampling is disabled
17
+ top_p=1.0, # Disable top-p filtering
18
+ top_k=50
19
+ )
20
+ pipe_lock = Lock()
21
+
22
+ # Initialize Whisper pipeline
23
+ whisper_pipe = openvino_genai.WhisperPipeline("whisper-ov-model", device="CPU")
24
+
25
+ def process_audio(data, sr):
26
+ """Audio processing with silence trimming"""
27
+ data = librosa.to_mono(data.T) if data.ndim > 1 else data
28
+ data = data.astype(np.float32)
29
+ data /= np.max(np.abs(data))
30
+
31
+ # Voice activity detection
32
+ frame_length, hop_length = 2048, 512
33
+ rms = librosa.feature.rms(y=data, frame_length=frame_length, hop_length=hop_length)[0]
34
+ smoothed_rms = uniform_filter1d(rms, size=5)
35
+ speech_frames = np.where(smoothed_rms > 0.025)[0]
36
+
37
+ if not speech_frames.size:
38
+ return None
39
+
40
+ start = max(0, int(speech_frames[0] * hop_length - 0.1*sr))
41
+ end = min(len(data), int((speech_frames[-1]+1) * hop_length + 0.1*sr))
42
+ return data[start:end]
43
+
44
+ def transcribe(audio):
45
+ """Audio to text transcription"""
46
+ sr, data = audio
47
+ processed = process_audio(data, sr)
48
+ if processed is None or len(processed) < 1600:
49
+ return ""
50
+
51
+ if sr != 16000:
52
+ processed = librosa.resample(processed, orig_sr=sr, target_sr=16000)
53
+
54
+ return whisper_pipe.generate(processed)
55
+
56
+ def stream_generator(message, history):
57
+ """Original Mistral streaming function (unchanged)"""
58
+ response_queue = Queue()
59
+ completion_event = Event()
60
+ error_message = [None]
61
+
62
+ def callback(token):
63
+ response_queue.put(token)
64
+ return openvino_genai.StreamingStatus.RUNNING
65
+
66
+ def generate():
67
+ try:
68
+ with pipe_lock:
69
+
70
+ mistral_pipe.generate(message, config, callback)
71
+
72
+ except Exception as e:
73
+ error_message[0] = str(e)
74
+ finally:
75
+ completion_event.set()
76
+
77
+ Thread(target=generate, daemon=True).start()
78
+
79
+ accumulated = []
80
+ while not completion_event.is_set() or not response_queue.empty():
81
+ if error_message[0]:
82
+ yield f"Error: {error_message[0]}"
83
+ return
84
+
85
+ try:
86
+ token = response_queue.get_nowait()
87
+ accumulated.append(token)
88
+ yield "".join(accumulated)
89
+ except Empty:
90
+ continue
91
+
92
+ yield "".join(accumulated)
93
+
94
+ # Create interface with added voice input
95
+ with gr.Blocks() as demo:
96
+ # Original chat interface
97
+ chat_interface = gr.ChatInterface(
98
+ stream_generator,
99
+ textbox=gr.Textbox(placeholder="Ask Mistral...", container=False),
100
+ title="EDU CHAT BY PHANINDRA REDDY K",
101
+ examples=[
102
+ "Explain quantum physics simply",
103
+ "Write a haiku about technology",
104
+ "What's the meaning of life?"
105
+ ],
106
+ cache_examples=False,
107
+ )
108
+
109
+ # Add voice input below examples
110
+ with gr.Row():
111
+ audio = gr.Audio(sources=["microphone"], type="numpy", label="Voice Input")
112
+ transcribe_btn = gr.Button("Send Transcription")
113
+
114
+ # Connect transcription to chat input
115
+ transcribe_btn.click(
116
+ transcribe,
117
+ inputs=audio,
118
+ outputs=chat_interface.textbox
119
+ )
120
+
121
+ if __name__ == "__main__":
122
+ demo.launch(share=True, debug=True)