File size: 6,283 Bytes
3bb6107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73338b2
3bb6107
 
1f23632
3bb6107
 
 
1f23632
3bb6107
1f23632
73338b2
1f23632
73338b2
1f23632
 
 
 
 
 
3bb6107
1f23632
 
 
 
 
 
 
 
 
 
 
 
3bb6107
1f23632
 
 
 
 
 
 
 
 
 
75f200e
b11ca01
3bb6107
75f200e
3bb6107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7e68d28
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# import gradio as gr
# import pandas as pd
# from langchain.embeddings import HuggingFaceEmbeddings
# from langchain.vectorstores import Chroma, faiss
# from langchain_community.llms import HuggingFaceEndpoint, HuggingFaceHub
# from langchain.chains import LLMChain
# from langchain_community.document_loaders.csv_loader import CSVLoader
# from langchain_community.document_loaders import PyPDFLoader
# from langchain.text_splitter import CharacterTextSplitter
# from langchain_community.document_loaders import TextLoader
# from langchain_community import vectorstores
# from langchain.prompts import PromptTemplate
# from langchain.chains import RetrievalQA
# from langchain.memory import ConversationBufferMemory
# from langchain.chains import ConversationalRetrievalChain
# from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
# from langchain.vectorstores import DocArrayInMemorySearch
# from langchain.document_loaders import TextLoader
# from langchain.chains import RetrievalQA, ConversationalRetrievalChain
# from langchain.memory import ConversationBufferMemory
# from langchain.chat_models import ChatOpenAI
# from langchain.document_loaders import TextLoader
# from langchain.document_loaders import PyPDFLoader
# import panel as pn
# import param
# import re
# import os

# api_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')

# model = HuggingFaceHub(
#     huggingfacehub_api_token=api_token,
#     repo_id="mistralai/Mistral-7B-Instruct-v0.2",
#     task="conversational",
#     model_kwargs={"temperature": 0.8, "max_length": 1000},
# )
# template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
# {context}
# Question: {question}
# Helpful Answer:"""
# QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

# def load_db(file, k):
#     # load documents
#     loader = PyPDFLoader(file)
#     documents = loader.load()
#     # split documents
#     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
#     docs = text_splitter.split_documents(documents)
#     # define embedding
#     embeddings = HuggingFaceEmbeddings()
#     # create vector database from data
#     db = vectorstores.FAISS.from_documents(docs, embeddings)
#     # define retriever
#     retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
#     # create a chatbot chain. Memory is managed externally.
#     question_generator_chain = LLMChain(llm=model, prompt=QA_CHAIN_PROMPT)

#     qa = ConversationalRetrievalChain.from_llm(
#         llm=model,
#         chain_type="stuff",
#         retriever=retriever,
#         return_source_documents=True,
#         return_generated_question=True,
#     )

#     return qa

# chat_history = []  # initialize chat history

# def greet(question, pdf_file):
#     global chat_history
#     a = load_db(pdf_file, 3)
#     r = a.invoke({"question": question, "chat_history": chat_history})
#     match = re.search(r'Helpful Answer:(.*)', r['answer'])
#     if match:
#         helpful_answer = match.group(1).strip()
#         # Extend chat history with the current question and answer
#         chat_history.extend([(question, helpful_answer)])
#         return helpful_answer
#     else:
#         return "No helpful answer found."

# iface = gr.Interface(fn=greet, inputs=["text", "file"], outputs="text")
# iface.launch(share=True)



import gradio as gr
import os
import re
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import faiss
from langchain_community.llms import HuggingFaceHub
from langchain.chains import ConversationalRetrievalChain
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate

api_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')

model = HuggingFaceHub(
    huggingfacehub_api_token=api_token,
    repo_id="mistralai/Mistral-7B-Instruct-v0.2",
    task="conversational",
    model_kwargs={"temperature": 0.8, "max_length": 1000},
)

template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

def load_db(file, k):
    loader = PyPDFLoader(file)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
    docs = text_splitter.split_documents(documents)
    embeddings = HuggingFaceEmbeddings()
    db = faiss.FAISS.from_documents(docs, embeddings)
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
    qa = ConversationalRetrievalChain.from_llm(
        llm=model,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True,
        return_generated_question=True,
    )
    return qa

chat_history = []  # initialize chat history

def greet(conversation):
    global chat_history
    user_input = conversation['user']
    pdf_file = conversation['file'][0] if 'file' in conversation else None
    
    if user_input:
        a = load_db(pdf_file, 3)
        r = a.invoke({"question": user_input, "chat_history": chat_history})
        match = re.search(r'Helpful Answer:(.*)', r['answer'])
        if match:
            helpful_answer = match.group(1).strip()
            # Extend chat history with the current question and answer
            chat_history.extend([(user_input, helpful_answer)])
            return {"system": "", "user": user_input, "assistant": helpful_answer}
        else:
            return {"system": "", "user": user_input, "assistant": "No helpful answer found."}
    return {"system": "", "user": "", "assistant": ""}

iface = gr.Interface(fn=greet, inputs=gr.Chat(), outputs=gr.Chat())
iface.launch(share=True)