Spaces:

herMaster
/

chat-with-a-pdf

Runtime error

App Files Files Community

herMaster commited on Jan 1, 2024

Commit

bbd68c6

1 Parent(s): 1a8b103

update app.py

Browse files

Files changed (1) hide show

app.py +18 -207

app.py CHANGED Viewed

@@ -1,200 +1,20 @@
-# import gradio as gr
-# from qdrant_client import models, QdrantClient
-# from sentence_transformers import SentenceTransformer
-# from PyPDF2 import PdfReader
-# from langchain.text_splitter import RecursiveCharacterTextSplitter
-# from langchain.callbacks.manager import CallbackManager
-# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-# # from langchain.llms import LlamaCpp
-# from langchain.vectorstores import Qdrant
-# from qdrant_client.http import models
-# # from langchain.llms import CTransformers
-# from ctransformers import AutoModelForCausalLM
-# # loading the embedding model -
-# encoder = SentenceTransformer('jinaai/jina-embedding-b-en-v1')
-# print("embedding model loaded.............................")
-# print("####################################################")
-# # loading the LLM
-# callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
-# print("loading the LLM......................................")
-# # llm = LlamaCpp(
-# #     model_path="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf",
-# #     n_ctx=2048,
-# #     f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
-# #     callback_manager=callback_manager,
-# #     verbose=True,
-# # )
-# llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
-#                                            model_file="llama-2-7b-chat.Q8_0.gguf",
-#                                            model_type="llama",
-#                                           #  config = ctransformers.hub.AutoConfig,
-#                                            # hf = True
-#                                            temperature = 0.2,
-#                                            max_new_tokens = 1024,
-#                                            stop = ['\n']
-#                                            )
-# print("LLM loaded........................................")
-# print("################################################################")
-# def get_chunks(text):
-#     text_splitter = RecursiveCharacterTextSplitter(
-#         # seperator = "\n",
-#         chunk_size = 500,
-#         chunk_overlap = 100,
-#         length_function = len,
-#     )
-#     chunks = text_splitter.split_text(text)
-#     return chunks
-# pdf_path = './100 Weird Facts About the Human Body.pdf'
-# reader = PdfReader(pdf_path)
-# text = ""
-# num_of_pages = len(reader.pages)
-# for page in range(num_of_pages):
-#     current_page = reader.pages[page]
-#     text += current_page.extract_text()
-# chunks = get_chunks(text)
-# print("Chunks are ready.....................................")
-# print("######################################################")
-# qdrant = QdrantClient(path = "./db")
-# print("db  created................................................")
-# print("#####################################################################")
-# qdrant.recreate_collection(
-#     collection_name="my_facts",
-#     vectors_config=models.VectorParams(
-#         size=encoder.get_sentence_embedding_dimension(),  # Vector size is defined by used model
-#         distance=models.Distance.COSINE,
-#     ),
-# )
-# print("Collection created........................................")
-# print("#########################################################")
-# li = []
-# for i in range(len(chunks)):
-#     li.append(i)
-# dic = zip(li, chunks)
-# dic= dict(dic)
-# qdrant.upload_records(
-#     collection_name="my_facts",
-#     records=[
-#         models.Record(
-#             id=idx,
-#             vector=encoder.encode(dic[idx]).tolist(),
-#             payload= {dic[idx][:5] : dic[idx]}
-#         ) for idx in dic.keys()
-#     ],
-# )
-# print("Records uploaded........................................")
-# print("###########################################################")
-# def chat(question):
-#     # question = input("ask question from pdf.....")
-#     hits = qdrant.search(
-#         collection_name="my_facts",
-#         query_vector=encoder.encode(question).tolist(),
-#         limit=3
-#     )
-#     context = []
-#     for hit in hits:
-#       context.append(list(hit.payload.values())[0])
-#     context = context[0] + context[1] + context[2]
-#     system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
-#     Read the given context before answering questions and think step by step. If you can not answer a user question based on
-#     the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""
-#     B_INST, E_INST = "[INST]", "[/INST]"
-#     B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
-#     SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS
-#     instruction = f"""
-#     Context: {context}
-#     User: {question}"""
-#     prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST
-#     result = llm(prompt_template)
-#     return result
-# gr.Interface(
-#     fn = chat,
-#     inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here 👉"),
-#     outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon 🚀"),
-#     title="Q&N with PDF 👩🏻‍💻📓✍🏻💡",
-#     description="This app facilitates a conversation with PDFs available on https://www.delo.si/assets/media/other/20110728/100%20Weird%20Facts%20About%20the%20Human%20Body.pdf💡",
-#     theme="soft",
-#     examples=["Hello", "what is the speed of human nerve impulses?"],
-#     # cache_examples=True,
-# ).launch()
 import gradio as gr
-from threading import Thread
-from queue import SimpleQueue
-from typing import Any, Dict, List, Union
-from langchain.callbacks.base import BaseCallbackHandler
-from langchain.schema import LLMResult
 from qdrant_client import models, QdrantClient
 from sentence_transformers import SentenceTransformer
 from PyPDF2 import PdfReader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from qdrant_client.models import PointStruct
-import os
 from langchain.callbacks.manager import CallbackManager
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-# from qdrant_client import QdrantClient
-# from langchain import VectorDBQA - This is obsolete
-from langchain.chains import RetrievalQA
-from langchain.llms import LlamaCpp
-# from PyPDF2 import PdfReader
 from langchain.vectorstores import Qdrant
-# from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.embeddings import HuggingFaceBgeEmbeddings
-from transformers import AutoModel
 from qdrant_client.http import models
-# from sentence_transformers import SentenceTransformer
-from langchain.prompts import PromptTemplate
 from ctransformers import AutoModelForCausalLM
 # loading the embedding model -
-encoder = SentenceTransformer("all-MiniLM-L6-v2")
 print("embedding model loaded.............................")
 print("####################################################")
@@ -206,9 +26,7 @@ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
 print("loading the LLM......................................")
 # llm = LlamaCpp(
-#     model_path="/home/devangpagare/llm/models/llama-2-7b-chat.Q3_K_S.gguf",
-#     # n_gpu_layers=n_gpu_layers,
-#     # n_batch=n_batch,
 #     n_ctx=2048,
 #     f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
 #     callback_manager=callback_manager,
@@ -216,16 +34,17 @@ print("loading the LLM......................................")
 # )
 llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
-                                           model_file="llama-2-7b-chat.Q3_K_S.gguf",
                                            model_type="llama",
                                           #  config = ctransformers.hub.AutoConfig,
                                            # hf = True
-                                           temperature = 0.2,
                                            # max_new_tokens = 1024,
                                            # stop = ['\n']
                                            )
 print("LLM loaded........................................")
 print("################################################################")
@@ -257,11 +76,11 @@ print(chunks)
 print("Chunks are ready.....................................")
 print("######################################################")
-qdrant = QdrantClient(path = "./db")
 print("db  created................................................")
 print("#####################################################################")
-qdrant.recreate_collection(
     collection_name="my_facts",
     vectors_config=models.VectorParams(
         size=encoder.get_sentence_embedding_dimension(),  # Vector size is defined by used model
@@ -273,23 +92,21 @@ print("Collection created........................................")
 print("#########################################################")
-# starting a list of same size as chunks
 li = []
 for i in range(len(chunks)):
     li.append(i)
-# concantinating the li and chunks to create a dcitionary
 dic = zip(li, chunks)
 dic= dict(dic)
-qdrant.upload_records(
     collection_name="my_facts",
     records=[
         models.Record(
             id=idx,
             vector=encoder.encode(dic[idx]).tolist(),
             payload= {dic[idx][:5] : dic[idx]}
-## payload is always suppose to be a dictionary with both keys and values as strings. To do this, I used first 5 chars of
-## every value as key to make the payload.
         ) for idx in dic.keys()
     ],
 )
@@ -298,21 +115,16 @@ print("Records uploaded........................................")
 print("###########################################################")
 def chat(question):
-    # question = input("ask question from pdf.....")
-    hits = qdrant.search(
         collection_name="my_facts",
         query_vector=encoder.encode(question).tolist(),
         limit=3
     )
     context = []
     for hit in hits:
-    #   print(hit.payload, "score:", hit.score)
       context.append(list(hit.payload.values())[0])
-    #   context += str(hit.payload[hit.payload.values()[:5]])
-    # print("##################################################################")
     context = context[0] + context[1] + context[2]
     system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
@@ -335,7 +147,7 @@ def chat(question):
     result = llm(prompt_template)
     return result
-gr.Interface(
     fn = chat,
     inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here 👉"),
     outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon 🚀"),
@@ -343,7 +155,6 @@ gr.Interface(
     description="This app facilitates a conversation with PDFs available on https://www.delo.si/assets/media/other/20110728/100%20Weird%20Facts%20About%20the%20Human%20Body.pdf💡",
     theme="soft",
     examples=["Hello", "what is the speed of human nerve impulses?"],
-    # cache_examples=True,
-).launch()

 import gradio as gr
 from qdrant_client import models, QdrantClient
 from sentence_transformers import SentenceTransformer
 from PyPDF2 import PdfReader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.callbacks.manager import CallbackManager
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+# from langchain.llms import LlamaCpp
 from langchain.vectorstores import Qdrant
 from qdrant_client.http import models
+# from langchain.llms import CTransformers
 from ctransformers import AutoModelForCausalLM
 # loading the embedding model -
+encoder = SentenceTransformer('jinaai/jina-embedding-b-en-v1')
 print("embedding model loaded.............................")
 print("####################################################")
 print("loading the LLM......................................")
 # llm = LlamaCpp(
+#     model_path="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf",
 #     n_ctx=2048,
 #     f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
 #     callback_manager=callback_manager,
 # )
 llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
+                                           model_file="llama-2-7b-chat.Q8_0.gguf",
                                            model_type="llama",
                                           #  config = ctransformers.hub.AutoConfig,
                                            # hf = True
+                                           # temperature = 0.2,
                                            # max_new_tokens = 1024,
                                            # stop = ['\n']
                                            )
 print("LLM loaded........................................")
 print("################################################################")
 print("Chunks are ready.....................................")
 print("######################################################")
+client = QdrantClient(path = "./db")
 print("db  created................................................")
 print("#####################################################################")
+client.recreate_collection(
     collection_name="my_facts",
     vectors_config=models.VectorParams(
         size=encoder.get_sentence_embedding_dimension(),  # Vector size is defined by used model
 print("#########################################################")
 li = []
 for i in range(len(chunks)):
     li.append(i)
 dic = zip(li, chunks)
 dic= dict(dic)
+client.upload_records(
     collection_name="my_facts",
     records=[
         models.Record(
             id=idx,
             vector=encoder.encode(dic[idx]).tolist(),
             payload= {dic[idx][:5] : dic[idx]}
         ) for idx in dic.keys()
     ],
 )
 print("###########################################################")
 def chat(question):
+    hits = client.search(
         collection_name="my_facts",
         query_vector=encoder.encode(question).tolist(),
         limit=3
     )
     context = []
     for hit in hits:
       context.append(list(hit.payload.values())[0])
     context = context[0] + context[1] + context[2]
     system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
     result = llm(prompt_template)
     return result
+screen = gr.Interface(
     fn = chat,
     inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here 👉"),
     outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon 🚀"),
     description="This app facilitates a conversation with PDFs available on https://www.delo.si/assets/media/other/20110728/100%20Weird%20Facts%20About%20the%20Human%20Body.pdf💡",
     theme="soft",
     examples=["Hello", "what is the speed of human nerve impulses?"],
+)
+screen.launch()