Spaces:

phani50101
/

chatbox

Sleeping

App Files Files Community

chatbox / app.py

phani50101

Add application main

b02b868 3 months ago

raw

history blame

3.64 kB

	from huggingface_hub import snapshot_download

	# Download models from Hugging Face to local folders
	snapshot_download(
	repo_id="OpenVINO/Mistral-7B-Instruct-v0.2-int4-ov",
	local_dir="mistral-ov"
	)
	snapshot_download(
	repo_id="OpenVINO/whisper-tiny-fp16-ov",
	local_dir="whisper-ov-model"
	)

	import gradio as gr
	import openvino_genai
	import librosa
	import numpy as np
	from threading import Thread, Lock, Event
	from scipy.ndimage import uniform_filter1d
	from queue import Queue, Empty

	# Initialize Mistral pipeline
	mistral_pipe = openvino_genai.LLMPipeline("mistral-ov", device="CPU")
	config = openvino_genai.GenerationConfig(
	max_new_tokens=100,
	num_beams=1,
	do_sample=False,
	temperature=0.0,
	top_p=1.0,
	top_k=50
	)
	pipe_lock = Lock()

	# Initialize Whisper pipeline
	whisper_pipe = openvino_genai.WhisperPipeline("whisper-ov-model", device="CPU")

	def process_audio(data, sr):
	"""Audio processing with silence trimming"""
	data = librosa.to_mono(data.T) if data.ndim > 1 else data
	data = data.astype(np.float32)
	data /= np.max(np.abs(data))

	# Voice activity detection
	frame_length, hop_length = 2048, 512
	rms = librosa.feature.rms(y=data, frame_length=frame_length, hop_length=hop_length)[0]
	smoothed_rms = uniform_filter1d(rms, size=5)
	speech_frames = np.where(smoothed_rms > 0.025)[0]

	if not speech_frames.size:
	return None

	start = max(0, int(speech_frames[0] * hop_length - 0.1*sr))
	end = min(len(data), int((speech_frames[-1]+1) * hop_length + 0.1*sr))
	return data[start:end]

	def transcribe(audio):
	"""Audio to text transcription"""
	sr, data = audio
	processed = process_audio(data, sr)
	if processed is None or len(processed) < 1600:
	return ""

	if sr != 16000:
	processed = librosa.resample(processed, orig_sr=sr, target_sr=16000)

	return whisper_pipe.generate(processed)

	def stream_generator(message, history):
	response_queue = Queue()
	completion_event = Event()
	error_message = [None]

	def callback(token):
	response_queue.put(token)
	return openvino_genai.StreamingStatus.RUNNING

	def generate():
	try:
	with pipe_lock:
	mistral_pipe.generate(message, config, callback)
	except Exception as e:
	error_message[0] = str(e)
	finally:
	completion_event.set()

	Thread(target=generate, daemon=True).start()

	accumulated = []
	while not completion_event.is_set() or not response_queue.empty():
	if error_message[0]:
	yield f"Error: {error_message[0]}"
	return

	try:
	token = response_queue.get_nowait()
	accumulated.append(token)
	yield "".join(accumulated)
	except Empty:
	continue

	yield "".join(accumulated)

	with gr.Blocks() as demo:
	chat_interface = gr.ChatInterface(
	stream_generator,
	textbox=gr.Textbox(placeholder="Ask Mistral...", container=False),
	title="EDU CHAT BY PHANINDRA REDDY K",
	examples=[
	"Explain quantum physics simply",
	"Write a haiku about technology",
	"What's the meaning of life?"
	],
	cache_examples=False,
	)

	with gr.Row():
	audio = gr.Audio(sources=["microphone"], type="numpy", label="Voice Input")
	transcribe_btn = gr.Button("Send Transcription")

	transcribe_btn.click(
	transcribe,
	inputs=audio,
	outputs=chat_interface.textbox
	)

	if __name__ == "__main__":
	demo.launch(share=True,debug=True)