avishkaaram-ekta-speech / avishkaaram_ekta_speech_model.py
Shashwat2528's picture
Upload avishkaaram_ekta_speech_model.py
18bae2d
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import sounddevice as sd
import soundfile as sf
import speech_recognition as sr
from gtts import gTTS
import pygame
model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
class avishkaaram_ekta:
def __init__(self,model):
self.model = model
self.tokenizer = tokenizer
def text_to_speech(self, text, output_file):
# Create a gTTS object with the text and desired language
tts = gTTS(text=text, lang='en')
# Save the audio to a file
tts.save(output_file)
def Passageready(self):
output_file2 = "passage.mp3"
self.text_to_speech("Give the passage you want to ask questions from", output_file2)
self.play_mp3(output_file2)
passage = input("Enter the Passage: ")
return passage
def play_mp3(self, file_path):
pygame.mixer.init()
pygame.mixer.music.load(file_path)
pygame.mixer.music.play()
def askquestion(self, audio_file, output_file):
print("Recording audio...")
audio = sd.rec(int(44100 * 6), samplerate=44100, channels=1)
sd.wait()
# Save the audio to a file
sf.write(output_file, audio, 44100)
print(f"Audio saved to {output_file}")
r = sr.Recognizer()
# Path to the audio file
# Load the audio file
with sr.AudioFile(audio_file) as source:
# Read the audio data
audio = r.record(source)
text = ""
try:
# Recognize speech from the audio
text = r.recognize_google(audio)
print("Transcription:", text)
except sr.UnknownValueError:
print("Speech recognition could not understand audio")
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
return text
def QuestionAnswer(self, passage):
output_file = "Ask.mp3"
self.text_to_speech("Ask the question", output_file)
self.play_mp3(output_file)
question = self.askquestion("question.wav", "question.wav")
ans = self.answer_question(passage, question)
output_file3 = "answer.mp3"
self.text_to_speech("The Answer to the question: " + " is: " + ans + ":...", output_file3)
self.play_mp3(output_file3)
return ans
def answer_question(self, passage, question):
inputs = self.tokenizer(passage, question, return_tensors="pt")
outputs = self.model(**inputs)
start_logits = outputs.start_logits
end_logits = outputs.end_logits
start_index = start_logits.argmax(dim=1).item()
end_index = end_logits.argmax(dim=1).item()
tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
answer = self.tokenizer.convert_tokens_to_string(tokens[start_index:end_index+1])
return answer
my_instance = avishkaaram_ekta(model)
passage = my_instance.Passageready()
my_instance.QuestionAnswer(passage)