DeepSeek-R1-TestRag

Sleeping

File size: 4,767 Bytes

import tempfile
import streamlit as st
import requests
import logging
from langchain.document_loaders import PDFPlumberLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import ChatPromptTemplate
from langchain.llms import HuggingFacePipeline
from transformers import pipeline

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Page configuration
st.set_page_config(page_title="DeepSeek Chatbot - ruslanmv.com", page_icon="🤖", layout="centered")

# Initialize session state for chat history
if "messages" not in st.session_state:
    st.session_state.messages = []

# Sidebar configuration
with st.sidebar:
    st.header("Model Configuration")
    st.markdown("[Get HuggingFace Token](https://huggingface.co/settings/tokens)")

    # Dropdown to select model
    model_options = ["deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"]
    selected_model = st.selectbox("Select Model", model_options, index=0)

    system_message = st.text_area("System Message", value="You are a friendly chatbot. Provide clear, accurate, and brief answers.", height=100)
    max_tokens = st.slider("Max Tokens", 10, 4000, 100)
    temperature = st.slider("Temperature", 0.1, 4.0, 0.3)
    top_p = st.slider("Top-p", 0.1, 1.0, 0.6)

# Function to query the Hugging Face API
def query(payload, api_url):
    headers = {"Authorization": f"Bearer {st.secrets['HF_TOKEN']}"}
    logger.info(f"Sending request to {api_url} with payload: {payload}")
    response = requests.post(api_url, headers=headers, json=payload)
    logger.info(f"Received response: {response.status_code}, {response.text}")
    try:
        return response.json()
    except requests.exceptions.JSONDecodeError:
        logger.error(f"Failed to decode JSON response: {response.text}")
        return None

# Function to load and process PDF
def process_pdf(uploaded_file):
    # Save the uploaded file to a temporary file
    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
        temp_file.write(uploaded_file.getvalue())
        temp_file_path = temp_file.name

    # Use PDFPlumberLoader to load the PDF from the temporary file
    loader = PDFPlumberLoader(temp_file_path)
    documents = loader.load()

    # Split the documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
    return text_splitter.split_documents(documents)

# Function to generate response using LangChain
def generate_response_with_langchain(question, context):
    prompt_template = """
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
    Question: {question} 
    Context: {context} 
    Answer:
    """

    prompt = ChatPromptTemplate.from_template(prompt_template)
    model = HuggingFacePipeline(pipeline("text-generation", model=selected_model))

    # Use LangChain to generate an answer
    chain = prompt | model
    response = chain.invoke({"question": question, "context": context})
    return response

# Chat interface
st.title("🤖 DeepSeek Chatbot")
st.caption("Powered by Hugging Face Inference API - Configure in sidebar")

# Display chat history
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# Handle input and PDF processing
uploaded_file = st.file_uploader("Upload PDF", type="pdf", accept_multiple_files=False)
documents = None  # Initialize the documents variable

if uploaded_file:
    documents = process_pdf(uploaded_file)
    context = "\n\n".join([doc.page_content for doc in documents])

    # Combine system message and user input into a single prompt
    prompt_input = "Ask a question about the PDF content"

    # Show the PDF-based question input if the PDF is uploaded
    prompt = st.chat_input(prompt_input) if documents else None

    if prompt:
        st.session_state.messages.append({"role": "user", "content": prompt})

        with st.chat_message("user"):
            st.markdown(prompt)

        try:
            with st.spinner("Generating response..."):
                answer = generate_response_with_langchain(prompt, context)

                # Show the answer from LangChain model
                with st.chat_message("assistant"):
                    st.markdown(answer)

                st.session_state.messages.append({"role": "assistant", "content": answer})

        except Exception as e:
            logger.error(f"Application Error: {str(e)}", exc_info=True)
            st.error(f"Application Error: {str(e)}")