Spaces:

Samarth991
/

LLM-Chatbot

Sleeping

File size: 6,777 Bytes

3dff4cb
e9840df
46a768d
2c230be
49dbc00
e9840df
3dff4cb
e9840df
ef9e1ba
 
e9840df
ef9e1ba
e9840df
 
 
6814430
e9840df
 
 
3dff4cb
 
 
 
 
 
 
 
 
688f875
49dbc00
90fc7ac
e9840df
 
 
 
 
688f875
e9840df
 
ef9e1ba
57005dc
6814430
 
57005dc
 
6814430
4d8a5d0
 
 
e9840df
3edae51
e9840df
 
3edae51
e9840df
3edae51
e9840df
3edae51
e9840df
3edae51
31f4dd5
 
 
 
 
6814430
ef9e1ba
31f4dd5
ef9e1ba
31f4dd5
 
 
 
 
 
6814430
 
de8093f
6814430
e9840df
1c52547
 
e9840df
 
 
1c52547
 
e9840df
 
 
 
1c52547
 
e9840df
 
 
 
5a2a128
 
90fc7ac
36b9066
49dbc00
e9840df
46a768d
 
 
 
 
 
 
 
2968e66
bcc7659
46a768d
 
 
 
 
 
 
 
 
 
 
 
 
e9840df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b30343
6814430
2b30343
 
e9840df
2b30343
6814430
2b30343
fdcda98
 
4d8a5d0
fdcda98
bc0dc94
 
46a768d
6814430
4727b07
bc0dc94
2af3209
 
 
 
 
 
46a768d
 
de8093f
 
2af3209
571b70a

import os 
import gradio as gr
import time 
from langchain.document_loaders import PDFMinerLoader,CSVLoader ,UnstructuredWordDocumentLoader,TextLoader,OnlinePDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

DEVICE = 'cpu'
FILE_EXT = ['pdf','text','csv','word','wav']


def loading_file():
    return "Loading..."


def get_openai_chat_model(API_key):
    try:
        from langchain.llms import OpenAI
    except ImportError as err:
        raise "{}, unable to load openAI. Please install openai and add OPENAIAPI_KEY"
    os.environ["OPENAI_API_KEY"] = API_key
    llm = OpenAI()
    return llm

def process_documents(documents,data_chunk=1500,chunk_overlap=100):
    text_splitter = CharacterTextSplitter(chunk_size=data_chunk, chunk_overlap=chunk_overlap,separator='\n')
    texts = text_splitter.split_documents(documents)
    return texts

def get_hugging_face_model(model_id,API_key,temperature=0.1):
    chat_llm = HuggingFaceHub(huggingfacehub_api_token=API_key,
                                      repo_id=model_id,
                                      model_kwargs={"temperature": temperature, "max_new_tokens": 4096})
    return chat_llm

def chat_application(llm_service,key):
    if llm_service == 'HuggingFace':
        llm = get_hugging_face_model(model_id='tiiuae/falcon-7b-instruct',API_key=key)
    else:
        llm = get_openai_chat_model(API_key=key)
    return llm 

def summarize_contents():
    question = "Generate a summary of the contents. Do not return the response in json format"
    return qa.run(question)

def document_loader(file_path,api_key,doc_type='pdf',llm='Huggingface'):
    document = None
    if doc_type == 'pdf':
        document = process_pdf_document(document_file=file_path)
    elif doc_type == 'text':
        document = process_text_document(document_file=file_path)
    elif doc_type == 'csv':
        document = process_csv_document(document_file=file_path)
    elif doc_type == 'word':
        document = process_word_document(document_file=file_path)
    
    print("Document :",document)
    embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-base',model_kwargs={"device": DEVICE})
    global qa
    try:
        texts = process_documents(documents=document)
        vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
     
        qa = RetrievalQA.from_chain_type(llm=chat_application(llm_service=llm,key=api_key),
                                    chain_type='stuff',
                                    retriever=vector_db.as_retriever(),
                                #  chain_type_kwargs=chain_type_kwargs,
                                return_source_documents=True
                                )
    except:
        return "Error in loading Documents "
    
    return "Document Processing completed ..."

        
def process_text_document(document_file):
    loader = TextLoader(document_file.name)
    document = loader.load()
    return document

def process_csv_document(document_file):
    loader = CSVLoader(file_path=document_file.name)
    document = loader.load()
    return document


def process_word_document(document_file):
    loader = UnstructuredWordDocumentLoader(file_path=document_file.name)
    document = loader.load()
    return document


def process_pdf_document(document_file):
    print("Document File Name :",document_file.name)
    loader = PDFMinerLoader(document_file.name)
    document = loader.load()
    return document

def infer(question, history):
    
    res = []
    for human, ai in history[:-1]:
        pair = (human, ai)
        res.append(pair)
    
    chat_history = res
    result = qa({"query": question})
    return result["result"]

def bot(history):
    response = infer(history[-1][0], history)
    history[-1][1] = ""
    
    for character in response:     
        history[-1][1] += character
        time.sleep(0.05)
        yield history

def add_text(history, text):
    history = history + [(text, None)]
    return history, ""


css="""
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""

title = """
<div style="text-align: center;max-width: 700px;">
    <h1>Chat with Data • OpenAI/HuggingFace</h1>
    <p style="text-align: center;">Upload a file from your computer, click the "Load data to LangChain" button, <br />
    when everything is ready, you can start asking questions about the data you uploaded ;) <br />
    This version is just for QA retrival so it will not use chat history, and uses Hugging face as LLM, 
    so you don't need any key</p>
</div>
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(title)
        
        with gr.Column():
            with gr.Box():
                gr.Row()
                LLM_option = gr.Dropdown(['HuggingFace','OpenAI'],label='Large Language Model Selection',info='LLM Service')
                file_extension = gr.Dropdown(FILE_EXT, label="File Extensions", info="Select your files extensions!")
            API_key = gr.Textbox(label="Add API key", type="password")
            with gr.Column():
                with gr.Box():
                    pdf_doc = gr.File(label="Upload File to start QA", file_types=FILE_EXT, type="file")
                    with gr.Row():
                        langchain_status = gr.Textbox(label="Status", placeholder="", interactive = False)
                        load_pdf = gr.Button("Upload File & Generate Embeddings",).style(full_width = False)

        # chatbot = gr.Chatbot()l̥
        # question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter")
        # submit_button = gr.Button("Send Message")

    load_pdf.click(loading_file, None, langchain_status, queue=False)    
    load_pdf.click(document_loader, inputs=[pdf_doc,API_key,file_extension,LLM_option], outputs=[langchain_status], queue=False)
        
    with gr.Column():
        with gr.Row():
            chatbot = gr.Chatbot(height=300)
            sources = gr.HTML(value = "Source paragraphs where I looked for answers will appear here", height=300)

        with gr.Row():
            question = gr.Textbox(label="Type your question?",lines=1).style(full_width=False)
            submit_btn = gr.Button(value="Send message", variant="secondary", scale = 1)
    question.submit(add_text, [chatbot, question], [chatbot, question]).then(bot, chatbot, chatbot)
    submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(bot, chatbot, chatbot)

demo.launch()