import os
import tempfile

import gradio as gr
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub
from PIL import Image
from transformers import pipeline

# Directories for temporary storage
FIGURES_DIR = tempfile.mkdtemp(prefix="figures_")

# Configure Hugging Face
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")

# Initialize embeddings and vector store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = None

# Initialize image captioning pipeline
captioner = pipeline("image-to-text", model="Salesforce/blip2-flan-t5-xl", use_auth_token=HUGGINGFACEHUB_API_TOKEN)

# Initialize LLM for QA
llm = HuggingFaceHub(
    repo_id="google/flan-t5-xxl",
    model_kwargs={"temperature":0.0, "max_length":256},
    huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
)

# Helper functions

def process_pdf(pdf_file):
    # Load text content
    loader = UnstructuredPDFLoader(pdf_file.name)
    docs = loader.load()

    # Basic text from PDF
    raw_text = "\n".join([d.page_content for d in docs])

    # Optionally extract images and caption them
    # Here, we simply caption any embedded images
    captions = []
    # (In a real pipeline, extract and save images separately)
    # For demo, we skip actual image files extraction

    # Combine text and captions
    combined = raw_text + "\n\n" + "\n".join(captions)
    return combined


def build_index(text):
    global vector_store
    # Split into chunks
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = splitter.split_text(text)

    # Create or update FAISS index
    vector_store = FAISS.from_texts(chunks, embeddings)


def answer_query(query):
    qa = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vector_store.as_retriever()
    )
    return qa.run(query)

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# Multimodal RAG QA App")

    with gr.Row():
        pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"] )
        question_input = gr.Textbox(label="Ask a question", placeholder="Enter your question here...")

    output = gr.Textbox(label="Answer", interactive=False)

    def on_submit(pdf, question):
        if pdf is not None:
            text = process_pdf(pdf)
            build_index(text)
        if not question:
            return "Please enter a question."
        return answer_query(question)

    submit_btn = gr.Button("Get Answer")
    submit_btn.click(on_submit, inputs=[pdf_input, question_input], outputs=output)

if __name__ == "__main__":
    demo.launch()