Spaces:

sanjeevbora
/

ChatBot

Paused

App Files Files Community

ChatBot / app.py

sanjeevbora

import spaces

4a30cca verified 6 months ago

raw

history blame

3.87 kB

	# import subprocess
	import os
	# # Run setup.sh script before starting the app
	# subprocess.run(["/bin/bash", "setup.sh"], check=True)
	os.system('pip install --upgrade pip')
	os.system('apt-get update && apt-get install -y libmagic1')
	os.system('pip install -U langchain-community')
	os.system('pip install --upgrade accelerate')
	os.system('pip install -i https://pypi.org/simple/ bitsandbytes --upgrade')

	import gradio as gr
	import spaces
	# import fitz # PyMuPDF for extracting text from PDFs
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import Chroma
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.docstore.document import Document
	from langchain.llms import HuggingFacePipeline
	from langchain.chains import RetrievalQA
	from transformers import AutoConfig, AutoTokenizer, pipeline, AutoModelForCausalLM
	import torch
	import re
	import transformers
	from torch import bfloat16
	from langchain_community.document_loaders import DirectoryLoader

	# Initialize embeddings and ChromaDB
	model_name = "sentence-transformers/all-mpnet-base-v2"
	device = "cuda" if torch.cuda.is_available() else "cpu"
	# device = "cuda"
	model_kwargs = {"device": device}
	embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

	loader = DirectoryLoader('./example', glob="*/.pdf", recursive=True, use_multithreading=True)
	docs = loader.load()
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	all_splits = text_splitter.split_documents(docs)
	vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="example_chroma_companies")
	books_db = Chroma(persist_directory="./example_chroma_companies", embedding_function=embeddings)

	books_db_client = books_db.as_retriever()

	# Initialize the model and tokenizer
	model_name = "stabilityai/stablelm-zephyr-3b"

	# bnb_config = transformers.BitsAndBytesConfig(
	# load_in_4bit=True,
	# bnb_4bit_quant_type='nf4',
	# bnb_4bit_use_double_quant=True,
	# bnb_4bit_compute_dtype=torch.bfloat16
	# )

	model_config = transformers.AutoConfig.from_pretrained(model_name, max_new_tokens=1024)
	model = transformers.AutoModelForCausalLM.from_pretrained(
	model_name,
	trust_remote_code=True,
	config=model_config,
	# quantization_config=bnb_config,
	device_map=device,
	)

	tokenizer = AutoTokenizer.from_pretrained(model_name)

	query_pipeline = transformers.pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	return_full_text=True,
	torch_dtype=torch.float16,
	device_map=device,
	do_sample=True, # Enable sampling
	temperature=0.7, # Keep if sampling is used
	top_p=0.9,
	top_k=50,
	max_new_tokens=256
	)


	llm = HuggingFacePipeline(pipeline=query_pipeline)

	books_db_client_retriever = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=books_db_client,
	verbose=True
	)

	# Function to retrieve answer using the RAG system
	@spaces.GPU()
	def test_rag(query):
	books_retriever = books_db_client_retriever.run(query)

	# Extract the relevant answer using regex
	corrected_text_match = re.search(r"Helpful Answer:(.*)", books_retriever, re.DOTALL)

	if corrected_text_match:
	corrected_text_books = corrected_text_match.group(1).strip()
	else:
	corrected_text_books = "No helpful answer found."

	return corrected_text_books

	# Define the Gradio interface
	def chat(query, history=None):
	if history is None:
	history = []
	answer = test_rag(query)
	history.append((query, answer))
	return history, history

	# Gradio interface
	interface = gr.Interface(
	fn=chat,
	inputs=[gr.Textbox(label="Enter your question"), gr.State()],
	outputs=[gr.Chatbot(label="Chat History"), gr.State()],
	live=True
	)

	interface.launch()