|
from transformers import AutoTokenizer, AutoModelForQuestionAnswering |
|
import sounddevice as sd |
|
import soundfile as sf |
|
import speech_recognition as sr |
|
from gtts import gTTS |
|
import pygame |
|
|
|
model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi') |
|
tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi') |
|
|
|
class avishkaaram_ekta: |
|
def __init__(self,model): |
|
self.model = model |
|
self.tokenizer = tokenizer |
|
|
|
def text_to_speech(self, text, output_file): |
|
|
|
tts = gTTS(text=text, lang='en') |
|
|
|
|
|
tts.save(output_file) |
|
|
|
def Passageready(self): |
|
output_file2 = "passage.mp3" |
|
self.text_to_speech("Give the passage you want to ask questions from", output_file2) |
|
self.play_mp3(output_file2) |
|
passage = input("Enter the Passage: ") |
|
return passage |
|
|
|
def play_mp3(self, file_path): |
|
pygame.mixer.init() |
|
pygame.mixer.music.load(file_path) |
|
pygame.mixer.music.play() |
|
|
|
|
|
def askquestion(self, audio_file, output_file): |
|
print("Recording audio...") |
|
audio = sd.rec(int(44100 * 6), samplerate=44100, channels=1) |
|
sd.wait() |
|
|
|
|
|
sf.write(output_file, audio, 44100) |
|
|
|
print(f"Audio saved to {output_file}") |
|
r = sr.Recognizer() |
|
|
|
|
|
|
|
|
|
with sr.AudioFile(audio_file) as source: |
|
|
|
audio = r.record(source) |
|
text = "" |
|
|
|
try: |
|
|
|
text = r.recognize_google(audio) |
|
print("Transcription:", text) |
|
except sr.UnknownValueError: |
|
print("Speech recognition could not understand audio") |
|
except sr.RequestError as e: |
|
print("Could not request results from Google Speech Recognition service; {0}".format(e)) |
|
|
|
return text |
|
|
|
|
|
def QuestionAnswer(self, passage): |
|
output_file = "Ask.mp3" |
|
self.text_to_speech("Ask the question", output_file) |
|
self.play_mp3(output_file) |
|
question = self.askquestion("question.wav", "question.wav") |
|
ans = self.answer_question(passage, question) |
|
output_file3 = "answer.mp3" |
|
self.text_to_speech("The Answer to the question: " + " is: " + ans + ":...", output_file3) |
|
self.play_mp3(output_file3) |
|
return ans |
|
|
|
def answer_question(self, passage, question): |
|
inputs = self.tokenizer(passage, question, return_tensors="pt") |
|
outputs = self.model(**inputs) |
|
start_logits = outputs.start_logits |
|
end_logits = outputs.end_logits |
|
start_index = start_logits.argmax(dim=1).item() |
|
end_index = end_logits.argmax(dim=1).item() |
|
tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]) |
|
answer = self.tokenizer.convert_tokens_to_string(tokens[start_index:end_index+1]) |
|
return answer |
|
|
|
my_instance = avishkaaram_ekta(model) |
|
passage = my_instance.Passageready() |
|
|
|
|
|
my_instance.QuestionAnswer(passage) |
|
|