File size: 4,837 Bytes
c0ece10 17b3855 c0ece10 17b3855 c0ece10 5d9fd64 5393bbb 5d9fd64 9aba39a c0ece10 5393bbb c0ece10 5393bbb c0ece10 5d9fd64 5393bbb c0ece10 5393bbb c0ece10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import streamlit as st
import openai
import fitz # PyMuPDF
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from io import BytesIO
# Function to extract text from the uploaded PDF file
def extract_pdf_text(pdf_file):
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
text = ""
for page in doc:
text += page.get_text("text")
return text
# Function to get embeddings for the text
def get_embeddings(texts):
response = openai.Embedding.create(
model="text-embedding-ada-002",
input=texts
)
embeddings = [embedding['embedding'] for embedding in response['data']]
return embeddings
# Function to get the most relevant context from the PDF for the query
def get_relevant_context(pdf_text, query, num_contexts=3):
# Split the PDF text into chunks for better matching
pdf_text_chunks = [pdf_text[i:i+1500] for i in range(0, len(pdf_text), 1500)]
# Get embeddings for both the document and the query
pdf_embeddings = get_embeddings(pdf_text_chunks)
query_embedding = get_embeddings([query])[0]
# Compute cosine similarity between query and document chunks
similarities = cosine_similarity([query_embedding], pdf_embeddings)
top_indices = similarities[0].argsort()[-num_contexts:][::-1]
# Combine the top context pieces
relevant_context = " ".join([pdf_text_chunks[i] for i in top_indices])
return relevant_context
# Function to generate a response from GPT-4 chat model
def generate_response(context, question, conversation_history):
messages = conversation_history + [
{"role": "user", "content": f"Context: {context}\nQuestion: {question}"}
]
response = openai.ChatCompletion.create(
model="gpt-4o-mini", # Use the GPT-4 chat model
messages=messages,
max_tokens=1200,
temperature=0.7,
)
answer = response['choices'][0]['message']['content'].strip()
# Append the new answer to the conversation history
conversation_history.append({"role": "assistant", "content": answer})
return answer, conversation_history
# Function to handle irrelevant questions
def is_irrelevant_question(question):
irrelevant_keywords = ["life", "love", "meaning", "future", "philosophy"]
return any(keyword in question.lower() for keyword in irrelevant_keywords)
# Streamlit UI
def main():
st.title("GPT-4 Research Paper Chatbot")
st.write("Ask any question related to the GPT-4 paper, and I'll try to answer it!")
# User input: OpenAI API key
openai_api_key = st.text_input("Enter your OpenAI API Key:", type="password")
if openai_api_key:
openai.api_key = openai_api_key
st.success("API Key successfully set!")
# Upload the PDF file
pdf_file = st.file_uploader("Upload GPT-4 Research Paper PDF", type="pdf")
if pdf_file is not None:
# Extract text from the uploaded PDF
pdf_text = extract_pdf_text(pdf_file)
st.write("PDF content loaded successfully!")
# Initialize conversation history (this will persist between interactions)
if 'conversation_history' not in st.session_state:
st.session_state.conversation_history = []
# User input: the question they want to ask
question = st.text_input("Ask your question:")
if question:
# Check if the question is irrelevant
if is_irrelevant_question(question):
st.write("Sorry, I don't know the answer to this question. I am an expert on GPT-4 knowledge.")
else:
# Get the most relevant context from the document
relevant_context = get_relevant_context(pdf_text, question)
# Generate the response from GPT-4 chat model
answer, conversation_history = generate_response(relevant_context, question, st.session_state.conversation_history)
# Update the conversation history in session state
st.session_state.conversation_history = conversation_history
# Display the answer
st.write(f"Answer: {answer}")
# End conversation button to reset chat history
if st.button("END CONVERSATION"):
st.session_state.conversation_history = [] # Reset conversation history
st.write("Conversation has been reset. Feel free to ask new questions.")
else:
st.warning("Please upload a PDF file to proceed.")
else:
st.warning("Please enter your OpenAI API Key to use the chatbot.")
if __name__ == "__main__":
main()
|