File size: 5,470 Bytes
3dff4cb
e9840df
 
 
 
 
3dff4cb
e9840df
ef9e1ba
 
e9840df
ef9e1ba
e9840df
 
 
6814430
e9840df
 
 
3dff4cb
 
 
 
 
 
 
 
 
e9840df
 
 
 
 
 
 
 
 
 
 
ef9e1ba
6814430
 
 
 
 
e9840df
ef9e1ba
6814430
e9840df
 
 
 
 
 
 
 
 
ef9e1ba
6814430
 
ef9e1ba
 
 
 
 
 
 
 
3dff4cb
6814430
 
ef9e1ba
6814430
e9840df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6814430
 
e9840df
443f232
6814430
 
e9840df
 
6814430
e9840df
be312e0
 
 
6814430
446dbbb
ef9e1ba
571b70a
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os 
import gradio as gr

from langchain.document_loaders import PDFMinerLoader,CSVLoader ,UnstructuredWordDocumentLoader,TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

DEVICE = 'cpu'
FILE_EXT = ['pdf','text','csv','word','wav']


def loading_file():
    return "Loading..."


def get_openai_chat_model(API_key):
    try:
        from langchain.llms import OpenAI
    except ImportError as err:
        raise "{}, unable to load openAI. Please install openai and add OPENAIAPI_KEY"
    os.environ["OPENAI_API_KEY"] = API_key
    llm = OpenAI()
    return llm

def process_documents(documents,data_chunk=1000,chunk_overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=data_chunk, chunk_overlap=chunk_overlap)
    texts = text_splitter.split_documents(documents[0])
    return texts

def get_hugging_face_model(model_id,API_key,temperature=0.1):
    chat_llm = HuggingFaceHub(huggingfacehub_api_token=API_key,
                                      repo_id=model_id,
                                      model_kwargs={"temperature": temperature, "max_new_tokens": 2048})
    return chat_llm

def chat_application(llm_service,key):
    if llm_model == 'HuggingFace':
        llm = get_hugging_face_model(model_id='tiiuae/falcon-7b-instruct',API_key=key)
    else:
        llm_model = get_openai_chat_model(API_key=key)


def document_loader(file_data,api_key,doc_type='pdf',llm='Huggingface'):
    embedding_model = SentenceTransformerEmbeddings(model_name='all-mpnet-base-v2',model_kwargs={"device": DEVICE})
    document = None
    if doc_type == 'pdf':
        document = process_pdf_document(document_file_name=file_data)
    elif doc_type == 'text':
        document = process_text_document(document_file_name=file_data)
    elif doc_type == 'csv':
        document = process_csv_document(document_file_name=file_data)
    elif doc_type == 'word':
        document = process_word_document(document_file_name=file_data)
    print(document)
    if document:
        texts = process_documents(documents=document)
        vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
        global qa 
        qa = RetrievalQA.from_chain_type(llm=chat_application(llm_service=llm,key=api_key),
                                     chain_type='stuff',
                                     retriever=vector_db.as_retriever(),
                                    #  chain_type_kwargs=chain_type_kwargs,
                                     return_source_documents=True
                                     )
    else:
        return "Error in loading Documents "
    
    return "Ready..."

        
def process_text_document(document_file_name):
    loader = TextLoader(document_file_name)
    document = loader.load()
    return document


def process_csv_document(document_file_name):
    loader = CSVLoader(file_path=document_file_name)
    document = loader.load()
    return document


def process_word_document(document_file_name):
    loader = UnstructuredWordDocumentLoader(file_path=document_file_name)
    document = loader.load()
    return document


def process_pdf_document(document_file_name):
    loader = PDFMinerLoader(document_file_name)
    document = loader.load()[0]
    return document



css="""
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""

title = """
<div style="text-align: center;max-width: 700px;">
    <h1>Chat with Data • OpenAI/HuggingFace</h1>
    <p style="text-align: center;">Upload a file from your computer, click the "Load data to LangChain" button, <br />
    when everything is ready, you can start asking questions about the data you uploaded ;) <br />
    This version is just for QA retrival so it will not use chat history, and uses Hugging face as LLM, 
    so you don't need any key</p>
</div>
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(title)
        
        with gr.Column():
            with gr.Box():
                LLM_option = gr.Dropdown(['HuggingFace','OpenAI'],label='Large Language Model Selection',info='LLM Service')
                API_key = gr.Textbox(label="Add {} API key".format(LLM_option), type="password")
            with gr.Column():
                with gr.Row():
                    file_extension = gr.Dropdown(FILE_EXT, label="File Extensions", info="Select your files extensions!")
                    pdf_doc = gr.File(label="Upload File to start QA", file_types=FILE_EXT, type="file")
                with gr.Row():
                    load_pdf = gr.Button("Load file to langchain")
                    langchain_status = gr.Textbox(label="Status", placeholder="", interactive=True)
        
        chatbot = gr.Chatbot()
        question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter")
        submit_button = gr.Button("Send Message")
    load_pdf.click(loading_file, None, langchain_status, queue=False)    
    load_pdf.click(document_loader, inputs=[pdf_doc,file_extension,API_key], outputs=[langchain_status], queue=False)
    
    # question.submit(add_text, [chatbot, question], [chatbot, question]).then(
    #     bot, chatbot, chatbot
    # )

demo.launch()