Spaces:
Sleeping
Sleeping
File size: 3,106 Bytes
91c8836 145163e 91c8836 5f5357d 145163e 5f5357d 91c8836 5f5357d 145163e 5f5357d 91c8836 5f5357d 91c8836 5f5357d 91c8836 5f5357d 91c8836 5f5357d 91c8836 5f5357d 91c8836 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import os
import tempfile
import streamlit as st
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
# Streamlit App Title
st.title("π DeepSeek-Powered RAG Chatbot")
# Step 1: Input API Key
api_key = st.text_input("π Enter your DeepSeek API Key:", type="password")
if api_key:
# Set the API key as an environment variable (optional)
os.environ["DEEPSEEK_API_KEY"] = api_key
# Step 2: Upload PDF Document
uploaded_file = st.file_uploader("π Upload a PDF document", type=["pdf"])
if uploaded_file:
# Load and process the document
try:
with st.spinner("Processing document..."):
# Save the uploaded file temporarily
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(uploaded_file.getvalue())
tmp_file_path = tmp_file.name
# Use the temporary file path with PyPDFLoader
loader = PyPDFLoader(tmp_file_path)
documents = loader.load()
# Remove the temporary file
os.unlink(tmp_file_path)
# Split the document into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)
# Generate embeddings and store them in a vector database
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(chunks, embeddings)
st.success("Document processed successfully!")
except Exception as e:
st.error(f"Error processing document: {e}")
st.stop()
# Step 3: Ask Questions About the Document
st.subheader("π¬ Chat with Your Document")
user_query = st.text_input("Ask a question:")
if user_query:
try:
# Set up the RAG pipeline with DeepSeek LLM
retriever = vector_store.as_retriever()
llm = ChatOpenAI(
model="deepseek-chat",
openai_api_key=api_key,
openai_api_base="https://api.deepseek.com/v1",
temperature=0.85,
max_tokens=1000 # Adjust token limit for safety
)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
# Generate response
with st.spinner("Generating response..."):
response = qa_chain.run(user_query)
st.write(f"**Answer:** {response}")
except Exception as e:
st.error(f"Error generating response: {e}")
else:
st.warning("Please enter your DeepSeek API key to proceed.")
|