import gradio as gr # import fitz # PyMuPDF for extracting text from PDFs from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import Chroma from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.docstore.document import Document from langchain.llms import HuggingFacePipeline from langchain.chains import RetrievalQA from transformers import AutoConfig, AutoTokenizer, pipeline, AutoModelForCausalLM import torch import re import transformers from torch import bfloat16 from langchain_community.document_loaders import DirectoryLoader import subprocess # Run setup.sh script before starting the app subprocess.run(["/bin/bash", "setup.sh"], check=True) # Initialize embeddings and ChromaDB model_name = "sentence-transformers/all-mpnet-base-v2" device = "cuda" if torch.cuda.is_available() else "cpu" model_kwargs = {"device": device} embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs) loader = DirectoryLoader('./example', glob="**/*.pdf", recursive=True, use_multithreading=True) docs = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) all_splits = text_splitter.split_documents(docs) vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="example_chroma_companies") books_db = Chroma(persist_directory="./example_chroma_companies", embedding_function=embeddings) books_db_client = books_db.as_retriever() # Initialize the model and tokenizer model_name = "stabilityai/stablelm-zephyr-3b" bnb_config = transformers.BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type='nf4', bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.bfloat16 ) model_config = transformers.AutoConfig.from_pretrained(model_name, max_new_tokens=1024) model = transformers.AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True, config=model_config, quantization_config=bnb_config, device_map=device, ) tokenizer = AutoTokenizer.from_pretrained(model_name) query_pipeline = transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, return_full_text=True, torch_dtype=torch.float16, device_map=device, do_sample=True, # Enable sampling temperature=0.7, # Keep if sampling is used top_p=0.9, top_k=50, max_new_tokens=256 ) llm = HuggingFacePipeline(pipeline=query_pipeline) books_db_client_retriever = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=books_db_client, verbose=True ) # Function to retrieve answer using the RAG system def test_rag(query): books_retriever = books_db_client_retriever.run(query) # Extract the relevant answer using regex corrected_text_match = re.search(r"Helpful Answer:(.*)", books_retriever, re.DOTALL) if corrected_text_match: corrected_text_books = corrected_text_match.group(1).strip() else: corrected_text_books = "No helpful answer found." return corrected_text_books # Define the Gradio interface def chat(query, history=None): if history is None: history = [] answer = test_rag(query) history.append((query, answer)) return history, history # Gradio interface interface = gr.Interface( fn=chat, inputs=[gr.Textbox(label="Enter your question"), gr.State()], outputs=[gr.Chatbot(label="Chat History"), gr.State()], live=True ) interface.launch()