Spaces:
Sleeping
Sleeping
File size: 3,397 Bytes
91c8836 145163e 91c8836 0312573 781b51d 0312573 f032aa3 0312573 91c8836 dd76442 5f5357d 145163e 5f5357d 91c8836 5f5357d 145163e 5f5357d 91c8836 5f5357d dd76442 91c8836 5f5357d 91c8836 dd76442 91c8836 5f5357d dd76442 5f5357d 91c8836 5f5357d 91c8836 5f5357d 91c8836 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import os
import tempfile
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter # Correct import
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA # Correct import
from langchain_community.chat_models import ChatOpenAI
# Streamlit App Title
st.title("π DeepSeek-Powered RAG Chatbot")
# Step 1: Input API Key
api_key = st.text_input("π Enter your DeepSeek API Key:", type="password")
if api_key:
# Set the API key as an environment variable (optional)
os.environ["DEEPSEEK_API_KEY"] = api_key
# Step 2: Upload PDF Document
uploaded_file = st.file_uploader("π Upload a PDF document", type=["pdf"])
# Use session state to persist the vector_store
if "vector_store" not in st.session_state:
st.session_state.vector_store = None
if uploaded_file and st.session_state.vector_store is None:
try:
with st.spinner("Processing document..."):
# Save the uploaded file temporarily
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(uploaded_file.getvalue())
tmp_file_path = tmp_file.name
# Use the temporary file path with PyPDFLoader
loader = PyPDFLoader(tmp_file_path)
documents = loader.load()
# Remove the temporary file
os.unlink(tmp_file_path)
# Split the document into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)
# Generate embeddings and store them in a vector database
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
st.session_state.vector_store = FAISS.from_documents(chunks, embeddings)
st.success("Document processed successfully!")
except Exception as e:
st.error(f"Error processing document: {e}")
st.stop()
# Step 3: Ask Questions About the Document
if st.session_state.vector_store:
st.subheader("π¬ Chat with Your Document")
user_query = st.text_input("Ask a question:")
if user_query:
try:
# Set up the RAG pipeline with DeepSeek LLM
retriever = st.session_state.vector_store.as_retriever()
llm = ChatOpenAI(
model="deepseek-chat",
openai_api_key=api_key,
openai_api_base="https://api.deepseek.com/v1",
temperature=0.85,
max_tokens=1000 # Adjust token limit for safety
)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
# Generate response
with st.spinner("Generating response..."):
response = qa_chain.run(user_query)
st.write(f"**Answer:** {response}")
except Exception as e:
st.error(f"Error generating response: {e}")
else:
st.warning("Please enter your DeepSeek API key to proceed.")
|