import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from transformers import AutoModelForCausalLM, AutoTokenizer
import zipfile
import os
import torch


# Percorsi ZIP per manuali e problemi
zip_path_m = "faiss_manual_index.zip"
faiss_manual_index = "faiss_manual_index"

zip_path_p = "faiss_problems_index.zip"
faiss_problems_index = "faiss_problems_index"

# Estrazione dei file ZIP se necessario
for zip_path, output_dir in [(zip_path_m, faiss_manual_index), (zip_path_p, faiss_problems_index)]:
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if os.path.exists(zip_path):
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(output_dir)

# Caricamento del modello di embedding
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/LaBSE")

# Caricamento dei vectorstore FAISS
manual_vectorstore = FAISS.load_local(faiss_manual_index, embedding_model, allow_dangerous_deserialization=True)
problems_vectorstore = FAISS.load_local(faiss_problems_index, embedding_model, allow_dangerous_deserialization=True)

# Caricamento del modello GPT-J da Hugging Face
model_name = "EleutherAI/gpt-j-6B"

# Forza l'uso della CPU
model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    torch_dtype=torch.float32,  # float32 per la CPU
    device_map={"": "cpu"}  # Specifica CPU come dispositivo
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")

# Funzione per la ricerca e il riassunto
def search_and_summarize(query):
    # Ricerca nei manuali e problemi
    manual_results = manual_vectorstore.similarity_search(query, k=2)
    manual_output = "\n\n".join([doc.page_content for doc in manual_results])

    problems_results = problems_vectorstore.similarity_search(query, k=2)
    problems_output = "\n\n".join([doc.page_content for doc in problems_results])

    combined_text = f"Manual Results:\n{manual_output}\n\nProblems Results:\n{problems_output}"

    # Generazione del riassunto con GPT-J
    input_text = f"Riassumi le seguenti informazioni:\n{combined_text}\n\nRiassunto:"
    inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
    output = model.generate(inputs.input_ids, max_length=300, temperature=0.7)
    summary = tokenizer.decode(output[0], skip_special_tokens=True)

    return manual_output, problems_output, summary

# Interfaccia Gradio
iface = gr.Interface(
    fn=search_and_summarize,
    inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
    outputs=[
        gr.Textbox(label="Manual Results"),
        gr.Textbox(label="Issues Results"),
        gr.Textbox(label="Summary by GPT-J")
    ],
    examples=[
        ["How to change the knife?"],
        ["What are the safety precautions for using the machine?"],
        ["How can I get help with the machine?"]
    ],
    title="Manual Querying System with GPT-J Summarization",
    description="Enter a question to get information from the manual and the common issues, summarized by GPT-J."
)

# Avvia l'app Gradio
iface.launch()