from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import sounddevice as sd
import soundfile as sf
import speech_recognition as sr
from gtts import gTTS
import pygame

model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')

class avishkaaram_ekta:
    def __init__(self,model):
        self.model = model
        self.tokenizer = tokenizer
    
    def text_to_speech(self, text, output_file):
        # Create a gTTS object with the text and desired language
        tts = gTTS(text=text, lang='en')

        # Save the audio to a file
        tts.save(output_file)
        
    def Passageready(self):
        output_file2 = "passage.mp3"
        self.text_to_speech("Give the passage you want to ask questions from", output_file2)
        self.play_mp3(output_file2)
        passage = input("Enter the Passage: ")
        return passage
    
    def play_mp3(self, file_path):
        pygame.mixer.init()
        pygame.mixer.music.load(file_path)
        pygame.mixer.music.play()


    def askquestion(self, audio_file, output_file):
        print("Recording audio...")
        audio = sd.rec(int(44100 * 6), samplerate=44100, channels=1)
        sd.wait()

        # Save the audio to a file
        sf.write(output_file, audio, 44100)

        print(f"Audio saved to {output_file}")
        r = sr.Recognizer()

        # Path to the audio file

        # Load the audio file
        with sr.AudioFile(audio_file) as source:
            # Read the audio data
            audio = r.record(source)
        text = ""

        try:
            # Recognize speech from the audio
            text = r.recognize_google(audio)
            print("Transcription:", text)
        except sr.UnknownValueError:
            print("Speech recognition could not understand audio")
        except sr.RequestError as e:
            print("Could not request results from Google Speech Recognition service; {0}".format(e))

        return text


    def QuestionAnswer(self, passage):
        output_file = "Ask.mp3"
        self.text_to_speech("Ask the question", output_file)
        self.play_mp3(output_file)
        question = self.askquestion("question.wav", "question.wav")
        ans = self.answer_question(passage, question)
        output_file3 = "answer.mp3"
        self.text_to_speech("The Answer to the question: " + " is: " + ans + ":...", output_file3)
        self.play_mp3(output_file3)
        return ans

    def answer_question(self, passage, question):
        inputs = self.tokenizer(passage, question, return_tensors="pt")
        outputs = self.model(**inputs)
        start_logits = outputs.start_logits
        end_logits = outputs.end_logits
        start_index = start_logits.argmax(dim=1).item()
        end_index = end_logits.argmax(dim=1).item()
        tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
        answer = self.tokenizer.convert_tokens_to_string(tokens[start_index:end_index+1])
        return answer

my_instance = avishkaaram_ekta(model)
passage = my_instance.Passageready()


my_instance.QuestionAnswer(passage)