File size: 6,036 Bytes
77a3002
 
 
 
 
 
 
 
 
 
 
1008ab2
65721af
10e5e54
1008ab2
77a3002
ac83c98
77a3002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382f4c2
 
 
d9640db
382f4c2
d9640db
2c5756e
ac83c98
77a3002
 
5fd0a46
8489a3a
 
77a3002
 
2007807
65721af
f2473b4
 
65721af
 
 
10e5e54
 
8489a3a
52f9aa7
 
30d530d
77a3002
421861c
d9640db
2c5756e
d9640db
1fc2033
f38df4a
 
d9640db
8489a3a
d9640db
 
f2473b4
d9640db
 
8489a3a
d9640db
f38df4a
d9640db
 
2c5756e
e801d11
 
 
 
 
 
2c5756e
d9640db
 
 
 
73de17b
30d530d
d9640db
 
 
4dc6167
d9640db
2c5756e
d9640db
 
 
 
 
 
270647f
d9640db
 
77a3002
 
 
 
 
 
 
 
 
 
 
3c9e051
d9640db
1008ab2
98f32c0
 
 
 
 
 
77a3002
 
 
 
 
 
 
 
 
98f32c0
 
d9640db
aa636d7
 
 
77a3002
f2473b4
f0b26a9
77a3002
 
f0b26a9
f2473b4
 
 
77a3002
 
8489a3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import yaml
import fitz
import torch
import gradio as gr
from PIL import Image
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import PyPDFLoader
from langchain.prompts import PromptTemplate
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import spaces
from langchain_text_splitters import CharacterTextSplitter,RecursiveCharacterTextSplitter


class PDFChatBot:
    def __init__(self, config_path="config.yaml"):
        """
        Initialize the PDFChatBot instance.

        Parameters:
            config_path (str): Path to the configuration file (default is "../config.yaml").
        """
        self.processed = False
        self.page = 0
        self.chat_history = []
        # Initialize other attributes to None
        self.prompt = None
        self.documents = None
        self.embeddings = None
        self.vectordb = None
        self.tokenizer = None
        self.model = None
        self.pipeline = None
        self.chain = None
        self.chunk_size = 512
        self.overlap_percentage = 50
        self.max_chunks_in_context = 2
        self.current_context = None
        self.model_temperatue = 0.5
        self.format_seperator="""\n\n--\n\n"""
        self.pipe = None
        #self.chunk_size_slider = chunk_size_slider

    def load_embeddings(self):

        self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        print("Embedding model loaded")

    def load_vectordb(self):
        overlap = int((self.overlap_percentage/100) * self.chunk_size)
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=self.chunk_size,
            chunk_overlap=overlap,
            length_function=len,
            add_start_index=True,
        )
        docs = text_splitter.split_documents(self.documents)
        self.vectordb = Chroma.from_documents(docs, self.embeddings)
        print("Vector store created")
    @spaces.GPU
    def load_tokenizer(self):
        self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")

    @spaces.GPU
    def create_organic_pipeline(self):
        self.pipe = pipeline(
            "text-generation",
            model="meta-llama/Meta-Llama-3-8B-Instruct",
            model_kwargs={"torch_dtype": torch.bfloat16},
            device="cuda",
        )
        print("Model pipeline loaded")

    def get_organic_context(self, query):
        documents = self.vectordb.similarity_search_with_relevance_scores(query, k=self.max_chunks_in_context)
        context = self.format_seperator.join([doc.page_content for doc, score in documents])
        self.current_context = context
        print("Context Ready")
        print(self.current_context)
    @spaces.GPU
    def create_organic_response(self, history, query):
        self.get_organic_context(query)
        """
        pipe = pipeline(
            "text-generation",
            model="meta-llama/Meta-Llama-3-8B-Instruct",
            model_kwargs={"torch_dtype": torch.bfloat16},
            device="cuda",
        )
        """
        messages = [
            {"role": "system", "content": "From the the contained given below, answer the question of user \n " + self.current_context},
            {"role": "user", "content": query},
        ]

        prompt = self.pipe.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        temp = 0.1
        outputs = self.pipe(
            prompt,
            max_new_tokens=1024,
            do_sample=True,
            temperature=temp,
            top_p=0.9,
        )
        print(outputs)
        return outputs[0]["generated_text"][len(prompt):]


    def process_file(self, file):
        """
        Process the uploaded PDF file and initialize necessary components: Tokenizer, VectorDB and LLM.

        Parameters:
            file (FileStorage): The uploaded PDF file.
        """
        self.documents = PyPDFLoader(file.name).load()
        self.load_embeddings()
        self.load_vectordb()
        self.create_organic_pipeline()
        #self.create_chain()
    @spaces.GPU
    def generate_response(self, history, query, file,chunk_size,chunk_overlap_percentage,model_temperature,max_chunks_in_context):

        self.chunk_size = chunk_size
        self.overlap_percentage = chunk_overlap_percentage
        self.model_temperatue = model_temperature
        self.max_chunks_in_context = max_chunks_in_context

        if not query:
            raise gr.Error(message='Submit a question')
        if not file:
            raise gr.Error(message='Upload a PDF')
        if not self.processed:
            self.process_file(file)
            self.processed = True



        result = self.create_organic_response(history="",query=query)
        for char in result:
            history[-1][-1] += char
        return history,""

    def render_file(self, file,chunk_size,chunk_overlap_percentage,model_temperature,max_chunks_in_context):
        print(chunk_size)
        doc = fitz.open(file.name)
        page = doc[self.page]
        self.chunk_size = chunk_size
        self.overlap_percentage = chunk_overlap_percentage
        self.model_temperatue = model_temperature
        self.max_chunks_in_context = max_chunks_in_context
        pix = page.get_pixmap(matrix=fitz.Matrix(300 / 72, 300 / 72))
        image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
        return image

    def add_text(self, history, text):
        """
        Add user-entered text to the chat history.
        Parameters:
            history (list): List of chat history tuples.
            text (str): User-entered text.
        Returns:
            list: Updated chat history.
        """
        if not text:
            raise gr.Error('Enter text')
        history.append((text, ''))
        return history