Mattral commited on
Commit
fb40d24
·
verified ·
1 Parent(s): 63b2477

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -89
app.py CHANGED
@@ -1,24 +1,33 @@
1
- import tempfile
2
  import streamlit as st
3
  import requests
4
  import logging
5
- from langchain.document_loaders import PDFPlumberLoader
 
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
- from langchain.prompts import ChatPromptTemplate
8
- from langchain.chains import LLMChain # This is used for chaining prompts and models
9
- from langchain.llms import HuggingFacePipeline
10
- from transformers import pipeline
11
 
12
  # Configure logging
13
  logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
16
  # Page configuration
17
- st.set_page_config(page_title="DeepSeek Chatbot - ruslanmv.com", page_icon="🤖", layout="centered")
 
 
 
 
18
 
19
- # Initialize session state for chat history
20
  if "messages" not in st.session_state:
21
  st.session_state.messages = []
 
 
 
 
 
 
 
22
 
23
  # Sidebar configuration
24
  with st.sidebar:
@@ -26,15 +35,63 @@ with st.sidebar:
26
  st.markdown("[Get HuggingFace Token](https://huggingface.co/settings/tokens)")
27
 
28
  # Dropdown to select model
29
- model_options = ["deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"]
 
 
30
  selected_model = st.selectbox("Select Model", model_options, index=0)
31
 
32
- system_message = st.text_area("System Message", value="You are a friendly chatbot. Provide clear, accurate, and brief answers.", height=100)
 
 
 
 
 
33
  max_tokens = st.slider("Max Tokens", 10, 4000, 100)
34
  temperature = st.slider("Temperature", 0.1, 4.0, 0.3)
35
  top_p = st.slider("Top-p", 0.1, 1.0, 0.6)
36
 
37
- # Function to query the Hugging Face API
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  def query(payload, api_url):
39
  headers = {"Authorization": f"Bearer {st.secrets['HF_TOKEN']}"}
40
  logger.info(f"Sending request to {api_url} with payload: {payload}")
@@ -46,82 +103,67 @@ def query(payload, api_url):
46
  logger.error(f"Failed to decode JSON response: {response.text}")
47
  return None
48
 
49
- # Function to load and process PDF
50
- def process_pdf(uploaded_file):
51
- # Save the uploaded file to a temporary file
52
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
53
- temp_file.write(uploaded_file.getvalue())
54
- temp_file_path = temp_file.name
55
-
56
- # Use PDFPlumberLoader to load the PDF from the temporary file
57
- loader = PDFPlumberLoader(temp_file_path)
58
- documents = loader.load()
59
-
60
- # Split the documents into chunks
61
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
62
- return text_splitter.split_documents(documents)
63
-
64
- # Function to generate response using LangChain
65
- def generate_response_with_langchain(question, context):
66
- prompt_template = """
67
- You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
68
- Question: {question}
69
- Context: {context}
70
- Answer:
71
- """
72
-
73
- prompt = ChatPromptTemplate.from_template(prompt_template)
74
-
75
- # Initialize HuggingFace pipeline
76
- hf_pipeline = pipeline("text-generation", model=selected_model)
77
- huggingface_llm = HuggingFacePipeline(pipeline=hf_pipeline)
78
-
79
- # Set up LangChain's LLMChain
80
- chain = LLMChain(prompt=prompt, llm=huggingface_llm)
81
-
82
- # Use the chain to invoke the model with context and question
83
- response = chain.run({"question": question, "context": context})
84
- return response
85
-
86
- # Chat interface
87
- st.title("🤖 DeepSeek Chatbot")
88
- st.caption("Powered by Hugging Face Inference API - Configure in sidebar")
89
-
90
- # Display chat history
91
- for message in st.session_state.messages:
92
- with st.chat_message(message["role"]):
93
- st.markdown(message["content"])
94
-
95
- # Handle input and PDF processing
96
- uploaded_file = st.file_uploader("Upload PDF", type="pdf", accept_multiple_files=False)
97
- documents = None # Initialize the documents variable
98
-
99
- if uploaded_file:
100
- documents = process_pdf(uploaded_file)
101
- context = "\n\n".join([doc.page_content for doc in documents])
102
-
103
- # Show the PDF-based question input if the PDF is uploaded
104
- prompt_input = "Ask a question about the PDF content"
105
 
106
- # Show the chat input if PDF is uploaded
107
- prompt = st.chat_input(prompt_input) if documents else None
108
-
109
- if prompt:
110
- st.session_state.messages.append({"role": "user", "content": prompt})
111
-
112
- with st.chat_message("user"):
113
- st.markdown(prompt)
114
-
115
- try:
116
- with st.spinner("Generating response..."):
117
- answer = generate_response_with_langchain(prompt, context)
118
-
119
- # Show the answer from LangChain model
120
- with st.chat_message("assistant"):
121
- st.markdown(answer)
122
-
123
- st.session_state.messages.append({"role": "assistant", "content": answer})
124
-
125
- except Exception as e:
126
- logger.error(f"Application Error: {str(e)}", exc_info=True)
127
- st.error(f"Application Error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import requests
3
  import logging
4
+ import os
5
+ from langchain_community.document_loaders import PDFPlumberLoader
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain_core.vectorstores import InMemoryVectorStore
8
+ from langchain.embeddings import HuggingFaceEmbeddings
 
 
9
 
10
  # Configure logging
11
  logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
13
 
14
  # Page configuration
15
+ st.set_page_config(
16
+ page_title="DeepSeek Chatbot with RAG - ruslanmv.com",
17
+ page_icon="🤖",
18
+ layout="centered"
19
+ )
20
 
21
+ # Initialize session state for chat history and vector store
22
  if "messages" not in st.session_state:
23
  st.session_state.messages = []
24
+ if "vector_store" not in st.session_state:
25
+ st.session_state.vector_store = None
26
+
27
+ # Set up PDF directory and embedding model
28
+ pdfs_directory = "./pdfs"
29
+ os.makedirs(pdfs_directory, exist_ok=True)
30
+ embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
31
 
32
  # Sidebar configuration
33
  with st.sidebar:
 
35
  st.markdown("[Get HuggingFace Token](https://huggingface.co/settings/tokens)")
36
 
37
  # Dropdown to select model
38
+ model_options = [
39
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
40
+ ]
41
  selected_model = st.selectbox("Select Model", model_options, index=0)
42
 
43
+ system_message = st.text_area(
44
+ "System Message",
45
+ value="You are a helpful assistant created by ruslanmv.com. Use the provided context to answer questions clearly and concisely. If the answer isn't in the context, say you don't know.",
46
+ height=100
47
+ )
48
+
49
  max_tokens = st.slider("Max Tokens", 10, 4000, 100)
50
  temperature = st.slider("Temperature", 0.1, 4.0, 0.3)
51
  top_p = st.slider("Top-p", 0.1, 1.0, 0.6)
52
 
53
+ # Main interface
54
+ st.title("🤖 DeepSeek Chatbot with RAG")
55
+ st.caption("Powered by Hugging Face Inference API - Configure in sidebar")
56
+
57
+ # PDF upload section
58
+ uploaded_file = st.file_uploader(
59
+ "Upload a PDF for context",
60
+ type="pdf",
61
+ accept_multiple_files=False
62
+ )
63
+
64
+ if uploaded_file:
65
+ try:
66
+ # Save uploaded PDF
67
+ pdf_path = os.path.join(pdfs_directory, uploaded_file.name)
68
+ with open(pdf_path, "wb") as f:
69
+ f.write(uploaded_file.getbuffer())
70
+
71
+ # Load and process PDF
72
+ loader = PDFPlumberLoader(pdf_path)
73
+ documents = loader.load()
74
+
75
+ # Split text into chunks
76
+ text_splitter = RecursiveCharacterTextSplitter(
77
+ chunk_size=1000,
78
+ chunk_overlap=200
79
+ )
80
+ chunks = text_splitter.split_documents(documents)
81
+
82
+ # Create and store vector store
83
+ vector_store = InMemoryVectorStore.from_documents(chunks, embedding_model)
84
+ st.session_state.vector_store = vector_store
85
+ st.success("PDF processed and indexed successfully!")
86
+ except Exception as e:
87
+ st.error(f"Error processing PDF: {str(e)}")
88
+
89
+ # Display chat history
90
+ for message in st.session_state.messages:
91
+ with st.chat_message(message["role"]):
92
+ st.markdown(message["content"])
93
+
94
+ # Function to query Hugging Face API
95
  def query(payload, api_url):
96
  headers = {"Authorization": f"Bearer {st.secrets['HF_TOKEN']}"}
97
  logger.info(f"Sending request to {api_url} with payload: {payload}")
 
103
  logger.error(f"Failed to decode JSON response: {response.text}")
104
  return None
105
 
106
+ # Handle user input
107
+ if prompt := st.chat_input("Type your message..."):
108
+ st.session_state.messages.append({"role": "user", "content": prompt})
109
+
110
+ with st.chat_message("user"):
111
+ st.markdown(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
+ try:
114
+ with st.spinner("Generating response..."):
115
+ # Check if vector store is available
116
+ if not st.session_state.vector_store:
117
+ st.error("Please upload a PDF first to provide context.")
118
+ st.stop()
119
+
120
+ # Retrieve relevant documents
121
+ vector_store = st.session_state.vector_store
122
+ related_docs = vector_store.similarity_search(prompt, k=3)
123
+
124
+ # Build context
125
+ context = "\n\n".join([doc.page_content for doc in related_docs])
126
+
127
+ # Prepare full prompt
128
+ full_prompt = (
129
+ f"{system_message}\n\n"
130
+ f"Context: {context}\n\n"
131
+ f"User: {prompt}\n"
132
+ "Assistant:"
133
+ )
134
+
135
+ # Prepare API payload
136
+ payload = {
137
+ "inputs": full_prompt,
138
+ "parameters": {
139
+ "max_new_tokens": max_tokens,
140
+ "temperature": temperature,
141
+ "top_p": top_p,
142
+ "return_full_text": False
143
+ }
144
+ }
145
+
146
+ # Query API
147
+ api_url = f"https://api-inference.huggingface.co/models/{selected_model}"
148
+ output = query(payload, api_url)
149
+
150
+ # Handle response
151
+ if output and isinstance(output, list) and len(output) > 0:
152
+ if 'generated_text' in output[0]:
153
+ assistant_response = output[0]['generated_text'].strip()
154
+
155
+ with st.chat_message("assistant"):
156
+ st.markdown(assistant_response)
157
+
158
+ st.session_state.messages.append({
159
+ "role": "assistant",
160
+ "content": assistant_response
161
+ })
162
+ else:
163
+ st.error("Unexpected response format from the model")
164
+ else:
165
+ st.error("No response generated - please try again")
166
+
167
+ except Exception as e:
168
+ logger.error(f"Error: {str(e)}", exc_info=True)
169
+ st.error(f"An error occurred: {str(e)}")