import gradio as gr from langchain_community.llms import HuggingFaceHub from langchain_community.vectorstores import Chroma from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_core.output_parsers import StrOutputParser from langchain_huggingface import HuggingFaceEmbeddings from langchain import hub from rerankers import Reranker import os # Configuración del token de acceso a Hugging Face (si usas modelo privado) os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Cargar PDF loader = PyPDFLoader("80dias.pdf") documents = loader.load() splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20) splits = splitter.split_documents(documents) # Crear embeddings embedding_model = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" embeddings = HuggingFaceEmbeddings(model_name=embedding_model) vectordb = Chroma.from_documents(splits, embedding=embeddings) # Modelo LLM desde HuggingFace (usa uno disponible en Spaces) llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.1", model_kwargs={"temperature": 0.5, "max_new_tokens": 500}) chain = llm | StrOutputParser() # Reranker ranker = Reranker("answerdotai/answerai-colbert-small-v1", model_type="colbert") # Función RAG def rag_chat(query): results = vectordb.similarity_search_with_score(query) context = [] for doc, score in results: if score < 7: context.append(doc.page_content) if not context: return "No tengo información para responder a esa pregunta." ranking = ranker.rank(query=query, docs=context) best_context = ranking[0].text prompt = hub.pull("rlm/rag-prompt") rag_chain = prompt | llm | StrOutputParser() result = rag_chain.invoke({"context": best_context, "question": query}) return result # Interfaz Gradio iface = gr.ChatInterface(fn=rag_chat, title="Chat Julio Verne - RAG", description="Pregunta lo que quieras sobre *La vuelta al mundo en 80 días* de Julio Verne.") iface.launch()