import gradio as gr
from gradio_rich_textbox import RichTextbox
from PIL import Image
from surya.ocr import run_ocr
from surya.model.detection.segformer import load_model as load_det_model, load_processor as load_det_processor
from surya.model.recognition.model import load_model as load_rec_model
from surya.model.recognition.processor import load_processor as load_rec_processor
# from lang_list import TEXT_SOURCE_LANGUAGE_NAMES
from gradio_client import Client
from dotenv import load_dotenv
import requests
from io import BytesIO
import cohere
import os
import re
import pandas as pd


title = "# Welcome to AyaTonic"
description = "Learn a New Language With Aya"
# Load environment variables
load_dotenv()
COHERE_API_KEY = os.getenv('CO_API_KEY')
SEAMLESSM4T = os.getenv('SEAMLESSM4T')
df = pd.read_csv("lang_list.csv")

inputlanguage = ""
producetext =  "\n\nProduce a complete expositional blog post in {target_language} based on the above :"
formatinputstring = "\n\nthe above text is a learning aid. you must use rich text format to rewrite the above and add 1 . a red color tags for nouns 2. a blue color tag for verbs 3. a green color tag for adjectives and adverbs:"

# Regular expression patterns for each color
patterns = {
    "red": r'<span style="color: red;">(.*?)</span>',
    "blue": r'<span style="color: blue;">(.*?)</span>',
    "green": r'<span style="color: green;">(.*?)</span>',
}

# Dictionaries to hold the matches
matches = {
    "red": [],
    "blue": [],
    "green": [],
}
class TaggedPhraseExtractor:
    def __init__(self, text=''):
        self.text = text
        self.patterns = {}

    def set_text(self, text):
        """Set the text to search within."""
        self.text = text

    def add_pattern(self, color, pattern):
        """Add a new color and its associated pattern."""
        self.patterns[color] = pattern

    def extract_phrases(self):
        """Extract phrases for all colors and patterns added."""
        matches = {color: re.findall(pattern, self.text) for color, pattern in self.patterns.items()}
        return matches

    def print_phrases(self):
        """Extract phrases and print them."""
        matches = self.extract_phrases()
        for color, phrases in matches.items():
            print(f"Phrases with color {color}:")
            for phrase in phrases:
                print(f"- {phrase}")
            print()  
            
co = cohere.Client(COHERE_API_KEY)
audio_client = Client(SEAMLESSM4T)
client = Client(SEAMLESSM4T)

def process_audio_to_text(audio_path, inputlanguage="English"):
    """
    Convert audio input to text using the Gradio client.
    """
    audio_client = Client(SEAMLESSM4T)
    result = audio_client.predict(
        audio_path,
        inputlanguage,  
        inputlanguage,  
        api_name="/s2tt"
    )
    print("Audio Result: ", result)
    return result[0]

def process_text_to_audio(text, translatefrom, translateto):
    """
    Convert text input to audio using the Gradio client.
    """
    audio_client = Client(SEAMLESSM4T)
    result = audio_client.predict(
        text,
        translatefrom,  
        translateto, 
        api_name="/t2st"
    )
    return result[0] 

class OCRProcessor:
    def __init__(self, langs=["en"]): #add input language code
        self.langs = langs
        self.det_processor, self.det_model = load_det_processor(), load_det_model()
        self.rec_model, self.rec_processor = load_rec_model(), load_rec_processor()

    def process_image(self, image):
        """
        Process a PIL image and return the OCR text.
        """
        predictions = run_ocr([image], [self.langs], self.det_model, self.det_processor, self.rec_model, self.rec_processor)
        return predictions[0]  # Assuming the first item in predictions contains the desired text

    def process_pdf(self, pdf_path):
        """
        Process a PDF file and return the OCR text.
        """
        predictions = run_ocr([pdf_path], [self.langs], self.det_model, self.det_processor, self.rec_model, self.rec_processor)
        return predictions[0]  # Assuming the first item in predictions contains the desired text
    
def process_input(image=None, file=None, audio=None, text="", translateto = "English", translatefrom = "English" ):
    ocr_processor = OCRProcessor()
    final_text = text
    if image is not None:
        ocr_prediction = ocr_processor.process_image(image)
        # gettig text from ocr object
        for idx in range(len((list(ocr_prediction)[0][1]))):
            final_text += " "
            final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
    if file is not None:
        if file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
            pil_image = Image.open(file)
            ocr_prediction = ocr_processor.process_image(pil_image)
            # gettig text from ocr object
            for idx in range(len((list(ocr_prediction)[0][1]))):
                final_text += " "
                final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
        elif file.name.lower().endswith('.pdf'):
            ocr_prediction = ocr_processor.process_pdf(file.name)
            # gettig text from ocr object
            for idx in range(len((list(ocr_prediction)[0][1]))):
                final_text += " "
                final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
        else:
            final_text += "\nUnsupported file type."
    print("OCR Text: ", final_text)
    if audio is not None:
        audio_text = process_audio_to_text(audio)
        final_text += "\n" + audio_text

    final_text_with_producetext = final_text + producetext

    response = co.generate(
        model='c4ai-aya',
        prompt=final_text_with_producetext,
        max_tokens=1024,
        temperature=0.5
    )
    # add graceful handling for errors (overflow)
    generated_text = response.generations[0].text
    print("Generated Text: ", generated_text)
    generated_text_with_format = generated_text + "\n" + formatinputstring
    response = co.generate(
        model='command-nightly',
        prompt=generated_text_with_format,
        max_tokens=4000,
        temperature=0.5
    )
    processed_text = response.generations[0].text

    audio_output = process_text_to_audio(processed_text, translateto, translateto)

    return processed_text, audio_output

def main():
    with gr.Blocks() as demo:
        gr.Markdown(title)
        gr.Markdown(description)
        
        with gr.Row():
            input_language = gr.Dropdown(choices=df["name"].to_list(), label="Your Native Language")
            target_language = gr.Dropdown(choices=df["name"].to_list(), label="Language To Learn")
        
        with gr.Accordion("Talk To 🌟AyaTonic"):
            with gr.Tab("🤙🏻Audio & Text"):
                audio_input = gr.Audio(sources="microphone", type="filepath", label="Mic Input")
                text_input = gr.Textbox(lines=2, label="Text Input")
            with gr.Tab("📸Image & File"):
                image_input = gr.Image(type="pil", label="Camera Input")
                file_input = gr.File(label="File Upload")
        
        process_button = gr.Button("🌟AyaTonic")
        
        processed_text_output = RichTextbox(label="Processed Text")
        audio_output = gr.Audio(label="Audio Output")
        
        process_button.click(
            fn=process_input,
            inputs=[image_input, file_input, audio_input, text_input, input_language, target_language],
            outputs=[processed_text_output, audio_output]
        )

if __name__ == "__main__":
    main()