avishkaaram-ekta-speech / avishkaaram_ekta_speech_model.py

Upload avishkaaram_ekta_speech_model.py

18bae2d about 2 years ago

3.21 kB

	from transformers import AutoTokenizer, AutoModelForQuestionAnswering
	import sounddevice as sd
	import soundfile as sf
	import speech_recognition as sr
	from gtts import gTTS
	import pygame

	model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
	tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')

	class avishkaaram_ekta:
	def __init__(self,model):
	self.model = model
	self.tokenizer = tokenizer

	def text_to_speech(self, text, output_file):
	# Create a gTTS object with the text and desired language
	tts = gTTS(text=text, lang='en')

	# Save the audio to a file
	tts.save(output_file)

	def Passageready(self):
	output_file2 = "passage.mp3"
	self.text_to_speech("Give the passage you want to ask questions from", output_file2)
	self.play_mp3(output_file2)
	passage = input("Enter the Passage: ")
	return passage

	def play_mp3(self, file_path):
	pygame.mixer.init()
	pygame.mixer.music.load(file_path)
	pygame.mixer.music.play()


	def askquestion(self, audio_file, output_file):
	print("Recording audio...")
	audio = sd.rec(int(44100 * 6), samplerate=44100, channels=1)
	sd.wait()

	# Save the audio to a file
	sf.write(output_file, audio, 44100)

	print(f"Audio saved to {output_file}")
	r = sr.Recognizer()

	# Path to the audio file

	# Load the audio file
	with sr.AudioFile(audio_file) as source:
	# Read the audio data
	audio = r.record(source)
	text = ""

	try:
	# Recognize speech from the audio
	text = r.recognize_google(audio)
	print("Transcription:", text)
	except sr.UnknownValueError:
	print("Speech recognition could not understand audio")
	except sr.RequestError as e:
	print("Could not request results from Google Speech Recognition service; {0}".format(e))

	return text


	def QuestionAnswer(self, passage):
	output_file = "Ask.mp3"
	self.text_to_speech("Ask the question", output_file)
	self.play_mp3(output_file)
	question = self.askquestion("question.wav", "question.wav")
	ans = self.answer_question(passage, question)
	output_file3 = "answer.mp3"
	self.text_to_speech("The Answer to the question: " + " is: " + ans + ":...", output_file3)
	self.play_mp3(output_file3)
	return ans

	def answer_question(self, passage, question):
	inputs = self.tokenizer(passage, question, return_tensors="pt")
	outputs = self.model(**inputs)
	start_logits = outputs.start_logits
	end_logits = outputs.end_logits
	start_index = start_logits.argmax(dim=1).item()
	end_index = end_logits.argmax(dim=1).item()
	tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
	answer = self.tokenizer.convert_tokens_to_string(tokens[start_index:end_index+1])
	return answer

	my_instance = avishkaaram_ekta(model)
	passage = my_instance.Passageready()


	my_instance.QuestionAnswer(passage)