deepseek-chat / app.py
chalisesagun's picture
Update app.py
5f5357d verified
raw
history blame
3.11 kB
import os
import tempfile
import streamlit as st
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
# Streamlit App Title
st.title("πŸ“„ DeepSeek-Powered RAG Chatbot")
# Step 1: Input API Key
api_key = st.text_input("πŸ”‘ Enter your DeepSeek API Key:", type="password")
if api_key:
# Set the API key as an environment variable (optional)
os.environ["DEEPSEEK_API_KEY"] = api_key
# Step 2: Upload PDF Document
uploaded_file = st.file_uploader("πŸ“‚ Upload a PDF document", type=["pdf"])
if uploaded_file:
# Load and process the document
try:
with st.spinner("Processing document..."):
# Save the uploaded file temporarily
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(uploaded_file.getvalue())
tmp_file_path = tmp_file.name
# Use the temporary file path with PyPDFLoader
loader = PyPDFLoader(tmp_file_path)
documents = loader.load()
# Remove the temporary file
os.unlink(tmp_file_path)
# Split the document into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)
# Generate embeddings and store them in a vector database
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(chunks, embeddings)
st.success("Document processed successfully!")
except Exception as e:
st.error(f"Error processing document: {e}")
st.stop()
# Step 3: Ask Questions About the Document
st.subheader("πŸ’¬ Chat with Your Document")
user_query = st.text_input("Ask a question:")
if user_query:
try:
# Set up the RAG pipeline with DeepSeek LLM
retriever = vector_store.as_retriever()
llm = ChatOpenAI(
model="deepseek-chat",
openai_api_key=api_key,
openai_api_base="https://api.deepseek.com/v1",
temperature=0.85,
max_tokens=1000 # Adjust token limit for safety
)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
# Generate response
with st.spinner("Generating response..."):
response = qa_chain.run(user_query)
st.write(f"**Answer:** {response}")
except Exception as e:
st.error(f"Error generating response: {e}")
else:
st.warning("Please enter your DeepSeek API key to proceed.")