File size: 3,397 Bytes
91c8836
145163e
91c8836
0312573
781b51d
0312573
 
f032aa3
0312573
91c8836
 
 
 
 
 
 
 
 
 
 
 
 
 
dd76442
 
 
 
 
5f5357d
 
 
 
 
 
145163e
5f5357d
 
 
91c8836
5f5357d
 
145163e
5f5357d
 
 
91c8836
5f5357d
 
dd76442
91c8836
5f5357d
 
 
 
91c8836
dd76442
 
91c8836
 
 
 
5f5357d
 
dd76442
5f5357d
 
 
 
 
 
 
 
91c8836
5f5357d
 
91c8836
 
5f5357d
 
91c8836
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import tempfile
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter  # Correct import
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA  # Correct import
from langchain_community.chat_models import ChatOpenAI

# Streamlit App Title
st.title("πŸ“„ DeepSeek-Powered RAG Chatbot")

# Step 1: Input API Key
api_key = st.text_input("πŸ”‘ Enter your DeepSeek API Key:", type="password")

if api_key:
    # Set the API key as an environment variable (optional)
    os.environ["DEEPSEEK_API_KEY"] = api_key

    # Step 2: Upload PDF Document
    uploaded_file = st.file_uploader("πŸ“‚ Upload a PDF document", type=["pdf"])

    # Use session state to persist the vector_store
    if "vector_store" not in st.session_state:
        st.session_state.vector_store = None

    if uploaded_file and st.session_state.vector_store is None:
        try:
            with st.spinner("Processing document..."):
                # Save the uploaded file temporarily
                with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
                    tmp_file.write(uploaded_file.getvalue())
                    tmp_file_path = tmp_file.name

                # Use the temporary file path with PyPDFLoader
                loader = PyPDFLoader(tmp_file_path)
                documents = loader.load()

                # Remove the temporary file
                os.unlink(tmp_file_path)

                # Split the document into chunks
                text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
                chunks = text_splitter.split_documents(documents)

                # Generate embeddings and store them in a vector database
                embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
                st.session_state.vector_store = FAISS.from_documents(chunks, embeddings)

            st.success("Document processed successfully!")
        except Exception as e:
            st.error(f"Error processing document: {e}")
            st.stop()

    # Step 3: Ask Questions About the Document
    if st.session_state.vector_store:
        st.subheader("πŸ’¬ Chat with Your Document")
        user_query = st.text_input("Ask a question:")

        if user_query:
            try:
                # Set up the RAG pipeline with DeepSeek LLM
                retriever = st.session_state.vector_store.as_retriever()
                llm = ChatOpenAI(
                    model="deepseek-chat",
                    openai_api_key=api_key,
                    openai_api_base="https://api.deepseek.com/v1",
                    temperature=0.85,
                    max_tokens=1000  # Adjust token limit for safety
                )
                qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

                # Generate response
                with st.spinner("Generating response..."):
                    response = qa_chain.run(user_query)
                    st.write(f"**Answer:** {response}")
            except Exception as e:
                st.error(f"Error generating response: {e}")
else:
    st.warning("Please enter your DeepSeek API key to proceed.")