File size: 2,831 Bytes
aaec2db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import gradio as gr
import os
from langchain import PromptTemplate, LLMChain
from langchain_together import Together
import pdfplumber

# Set the API key with double quotes
os.environ['TOGETHER_API_KEY'] = "d88cb7414e4039a84d2ed63f1b47daaaa4230c4c53a422045d8a30a9a3bc87d8"

def extract_text_from_pdf(pdf_file, max_pages=16):
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for i, page in enumerate(pdf.pages):
            if i >= max_pages:
                break
            text += page.extract_text() + "\n"
    return text

def Bot(text, question):
    chat_template = """
    Based on the provided context: {text}
    Please answer the following question: {Questions}
    Only provide answers that are directly related to the context. If the question is unrelated, respond with "I don't know".
    """
    prompt = PromptTemplate(
        input_variables=['text', 'Questions'],
        template=chat_template
    )
    llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=50)
    Generated_chat = LLMChain(llm=llama3, prompt=prompt)

    try:
        response = Generated_chat.invoke({
            "text": text,
            "Questions": question
        })

        response_text = response['text']

        response_text = response_text.replace("assistant", "")

        # Post-processing to handle repeated words and ensure completeness
        words = response_text.split()
        seen = set()
        filtered_words = [word for word in words if word.lower() not in seen and not seen.add(word.lower())]
        response_text = ' '.join(filtered_words)
        response_text = response_text.strip()  # Ensuring no extra spaces at the ends
        if not response_text.endswith('.'):
            response_text += '.'

        return response_text
    except Exception as e:
        return f"Error in generating response: {e}"

def ChatBot(history, document, question):
    greetings = ["hi", "hello", "hey", "greetings", "what's up", "howdy"]
    question_lower = question.lower().strip()
    if question_lower in greetings or any(question_lower.startswith(greeting) for greeting in greetings):
        return history + [("User", question), ("Bot", "Hello! How can I assist you with the document today?")]

    text = extract_text_from_pdf(document)
    response = Bot(text, question)
    history.append(("User", question))
    history.append(("Bot", response))
    return history

with gr.Blocks() as iface:
    chatbot = gr.Chatbot()
    document = gr.File(label="Upload PDF Document", type="filepath")
    question = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")

    def respond(history, document, question):
        return ChatBot(history, document, question)

    question.submit(respond, [chatbot, document, question], chatbot)

iface.launch(debug=True)