# import gradio as gr # from qdrant_client import models, QdrantClient # from sentence_transformers import SentenceTransformer # from PyPDF2 import PdfReader # from langchain.text_splitter import RecursiveCharacterTextSplitter # from langchain.callbacks.manager import CallbackManager # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler # # from langchain.llms import LlamaCpp # from langchain.vectorstores import Qdrant # from qdrant_client.http import models # # from langchain.llms import CTransformers # from ctransformers import AutoModelForCausalLM # # loading the embedding model - # encoder = SentenceTransformer('jinaai/jina-embedding-b-en-v1') # print("embedding model loaded.............................") # print("####################################################") # # loading the LLM # callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) # print("loading the LLM......................................") # # llm = LlamaCpp( # # model_path="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf", # # n_ctx=2048, # # f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls # # callback_manager=callback_manager, # # verbose=True, # # ) # llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF", # model_file="llama-2-7b-chat.Q8_0.gguf", # model_type="llama", # # config = ctransformers.hub.AutoConfig, # # hf = True # temperature = 0.2, # max_new_tokens = 1024, # stop = ['\n'] # ) # print("LLM loaded........................................") # print("################################################################") # def get_chunks(text): # text_splitter = RecursiveCharacterTextSplitter( # # seperator = "\n", # chunk_size = 500, # chunk_overlap = 100, # length_function = len, # ) # chunks = text_splitter.split_text(text) # return chunks # pdf_path = './100 Weird Facts About the Human Body.pdf' # reader = PdfReader(pdf_path) # text = "" # num_of_pages = len(reader.pages) # for page in range(num_of_pages): # current_page = reader.pages[page] # text += current_page.extract_text() # chunks = get_chunks(text) # print("Chunks are ready.....................................") # print("######################################################") # qdrant = QdrantClient(path = "./db") # print("db created................................................") # print("#####################################################################") # qdrant.recreate_collection( # collection_name="my_facts", # vectors_config=models.VectorParams( # size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model # distance=models.Distance.COSINE, # ), # ) # print("Collection created........................................") # print("#########################################################") # li = [] # for i in range(len(chunks)): # li.append(i) # dic = zip(li, chunks) # dic= dict(dic) # qdrant.upload_records( # collection_name="my_facts", # records=[ # models.Record( # id=idx, # vector=encoder.encode(dic[idx]).tolist(), # payload= {dic[idx][:5] : dic[idx]} # ) for idx in dic.keys() # ], # ) # print("Records uploaded........................................") # print("###########################################################") # def chat(question): # # question = input("ask question from pdf.....") # hits = qdrant.search( # collection_name="my_facts", # query_vector=encoder.encode(question).tolist(), # limit=3 # ) # context = [] # for hit in hits: # context.append(list(hit.payload.values())[0]) # context = context[0] + context[1] + context[2] # system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions. # Read the given context before answering questions and think step by step. If you can not answer a user question based on # the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question.""" # B_INST, E_INST = "[INST]", "[/INST]" # B_SYS, E_SYS = "<>\n", "\n<>\n\n" # SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS # instruction = f""" # Context: {context} # User: {question}""" # prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST # result = llm(prompt_template) # return result # gr.Interface( # fn = chat, # inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here 👉"), # outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon 🚀"), # title="Q&N with PDF 👩🏻‍💻📓✍🏻💡", # description="This app facilitates a conversation with PDFs available on https://www.delo.si/assets/media/other/20110728/100%20Weird%20Facts%20About%20the%20Human%20Body.pdf💡", # theme="soft", # examples=["Hello", "what is the speed of human nerve impulses?"], # # cache_examples=True, # ).launch() import gradio as gr from threading import Thread from queue import SimpleQueue from typing import Any, Dict, List, Union from langchain.callbacks.base import BaseCallbackHandler from langchain.schema import LLMResult from qdrant_client import models, QdrantClient from sentence_transformers import SentenceTransformer from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter from qdrant_client.models import PointStruct import os from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler # from qdrant_client import QdrantClient # from langchain import VectorDBQA - This is obsolete from langchain.chains import RetrievalQA from langchain.llms import LlamaCpp # from PyPDF2 import PdfReader from langchain.vectorstores import Qdrant # from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceBgeEmbeddings from transformers import AutoModel from qdrant_client.http import models # from sentence_transformers import SentenceTransformer from langchain.prompts import PromptTemplate from ctransformers import AutoModelForCausalLM # loading the embedding model - encoder = SentenceTransformer("all-MiniLM-L6-v2") print("embedding model loaded.............................") print("####################################################") # loading the LLM callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) print("loading the LLM......................................") # llm = LlamaCpp( # model_path="/home/devangpagare/llm/models/llama-2-7b-chat.Q3_K_S.gguf", # # n_gpu_layers=n_gpu_layers, # # n_batch=n_batch, # n_ctx=2048, # f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls # callback_manager=callback_manager, # verbose=True, # ) llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF", model_file="llama-2-7b-chat.Q3_K_S.gguf", model_type="llama", # config = ctransformers.hub.AutoConfig, # hf = True temperature = 0.2, # max_new_tokens = 1024, # stop = ['\n'] ) print("LLM loaded........................................") print("################################################################") def get_chunks(text): text_splitter = RecursiveCharacterTextSplitter( # seperator = "\n", chunk_size = 500, chunk_overlap = 100, length_function = len, ) chunks = text_splitter.split_text(text) return chunks pdf_path = './100 Weird Facts About the Human Body.pdf' reader = PdfReader(pdf_path) text = "" num_of_pages = len(reader.pages) for page in range(num_of_pages): current_page = reader.pages[page] text += current_page.extract_text() chunks = get_chunks(text) print(chunks) print("Chunks are ready.....................................") print("######################################################") qdrant = QdrantClient(path = "./db") print("db created................................................") print("#####################################################################") qdrant.recreate_collection( collection_name="my_facts", vectors_config=models.VectorParams( size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model distance=models.Distance.COSINE, ), ) print("Collection created........................................") print("#########################################################") # starting a list of same size as chunks li = [] for i in range(len(chunks)): li.append(i) # concantinating the li and chunks to create a dcitionary dic = zip(li, chunks) dic= dict(dic) qdrant.upload_records( collection_name="my_facts", records=[ models.Record( id=idx, vector=encoder.encode(dic[idx]).tolist(), payload= {dic[idx][:5] : dic[idx]} ## payload is always suppose to be a dictionary with both keys and values as strings. To do this, I used first 5 chars of ## every value as key to make the payload. ) for idx in dic.keys() ], ) print("Records uploaded........................................") print("###########################################################") def chat(question): # question = input("ask question from pdf.....") hits = qdrant.search( collection_name="my_facts", query_vector=encoder.encode(question).tolist(), limit=3 ) context = [] for hit in hits: # print(hit.payload, "score:", hit.score) context.append(list(hit.payload.values())[0]) # context += str(hit.payload[hit.payload.values()[:5]]) # print("##################################################################") context = context[0] + context[1] + context[2] system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions. Read the given context before answering questions and think step by step. If you can not answer a user question based on the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question.""" B_INST, E_INST = "[INST]", "[/INST]" B_SYS, E_SYS = "<>\n", "\n<>\n\n" SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS instruction = f""" Context: {context} User: {question}""" prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST result = llm(prompt_template) return result gr.Interface( fn = chat, inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here 👉"), outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon 🚀"), title="Q&N with PDF 👩🏻‍💻📓✍🏻💡", description="This app facilitates a conversation with PDFs available on https://www.delo.si/assets/media/other/20110728/100%20Weird%20Facts%20About%20the%20Human%20Body.pdf💡", theme="soft", examples=["Hello", "what is the speed of human nerve impulses?"], # cache_examples=True, ).launch()