Spaces:

Manishkumaryadav
/

smart-document-explorer

Runtime error

File size: 2,138 Bytes

ab28335
90462dd
ccf7c37
 
 
90462dd
 
ccf7c37
 
ab28335
ccf7c37
 
7228198
90462dd
ccf7c37
 
90462dd
ccf7c37
 
90462dd
ccf7c37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab28335
ccf7c37
 
ab28335
ccf7c37
 
 
 
 
 
 
a716036
 
ccf7c37

import gradio as gr
import os
import spacy
import torch
from transformers import pipeline
import speech_recognition as sr
from gtts import gTTS
import tempfile
import base64

# Install required Spacy model
os.system("python -m spacy download en_core_web_sm")
nlp = spacy.load("en_core_web_sm")

# Load Hugging Face model (Example: Bloom or other LLM from Hugging Face)
chat_model = pipeline("text-generation", model="bigscience/bloom-560m")

# Speech-to-Text function
def transcribe_audio(audio_path):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_path) as source:
        audio_data = recognizer.record(source)
        try:
            return recognizer.recognize_google(audio_data)
        except sr.UnknownValueError:
            return "Could not understand the audio."

# AI Chat Response
def chat_with_ai(user_input):
    response = chat_model(user_input, max_length=150, do_sample=True, temperature=0.7)
    return response[0]['generated_text']

# Text-to-Speech function
def generate_speech(text):
    tts = gTTS(text=text, lang='en')
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
    tts.save(temp_file.name)
    with open(temp_file.name, "rb") as audio_file:
        encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8")
    os.unlink(temp_file.name)
    return encoded_audio

# Chat Interface
def chat_interface(user_input, audio_file=None):
    if audio_file is not None:
        user_input = transcribe_audio(audio_file)
    
    ai_response = chat_with_ai(user_input)
    audio_response = generate_speech(ai_response)
    
    return ai_response, f"data:audio/mp3;base64,{audio_response}"

# Create Gradio UI
gui = gr.Interface(
    fn=chat_interface,
    inputs=[
        gr.Textbox(lines=2, placeholder="Type your message here..."),
        gr.Audio(sources=["microphone", "upload"], type="filepath")

    ],
    outputs=[
        gr.Textbox(label="AI Response"),
        gr.Audio(label="AI Voice Response")
    ],
    title="AI Chat Assistant",
    description="An AI-powered chat assistant with text & voice input/output.",
    theme="huggingface"
)

gui.launch()