Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -33,42 +33,41 @@ def upload_pdf(file):
|
|
33 |
|
34 |
def load_pdf(file_path):
|
35 |
"""Load text from the PDF"""
|
36 |
-
|
37 |
-
|
38 |
|
39 |
-
|
40 |
|
41 |
|
42 |
|
43 |
def split_text(documents):
|
44 |
"""Split text into smaller chunks"""
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
return text_splitter.split_documents(documents)
|
52 |
|
53 |
|
54 |
def index_docs(documents):
|
55 |
-
|
56 |
|
57 |
|
58 |
|
59 |
def retrieve_docs(query):
|
60 |
"""Retrieve similar documents"""
|
61 |
-
|
62 |
|
63 |
|
64 |
def query_huggingface_api(prompt):
|
65 |
"""Send query to DeepSeek R1 model on Hugging Face"""
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
|
73 |
|
74 |
def answer_question(question, documents):
|
@@ -80,7 +79,11 @@ def answer_question(question, documents):
|
|
80 |
|
81 |
st.title("PDF-based RAG Chatbot")
|
82 |
|
83 |
-
uploaded_file = st.file_uploader(
|
|
|
|
|
|
|
|
|
84 |
|
85 |
if uploaded_file:
|
86 |
upload_pdf(uploaded_file)
|
|
|
33 |
|
34 |
def load_pdf(file_path):
|
35 |
"""Load text from the PDF"""
|
36 |
+
loader = PDFPlumberLoader(file_path)
|
37 |
+
documents = loader.load()
|
38 |
|
39 |
+
return documents
|
40 |
|
41 |
|
42 |
|
43 |
def split_text(documents):
|
44 |
"""Split text into smaller chunks"""
|
45 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
46 |
+
chunk_size=1000,
|
47 |
+
chunk_overlap=200,
|
48 |
+
add_start_index=True,
|
49 |
+
)
|
50 |
+
return text_splitter.split_documents(documents)
|
|
|
51 |
|
52 |
|
53 |
def index_docs(documents):
|
54 |
+
vector_store.add_documents(documents)
|
55 |
|
56 |
|
57 |
|
58 |
def retrieve_docs(query):
|
59 |
"""Retrieve similar documents"""
|
60 |
+
return vector_store.similarity_search(query)
|
61 |
|
62 |
|
63 |
def query_huggingface_api(prompt):
|
64 |
"""Send query to DeepSeek R1 model on Hugging Face"""
|
65 |
+
payload = {"inputs": prompt}
|
66 |
+
response = requests.post(HF_API_URL, headers=HEADERS, json=payload)
|
67 |
+
if response.status_code == 200:
|
68 |
+
return response.json()[0]["generated_text"]
|
69 |
+
else:
|
70 |
+
return "Error: Unable to process request."
|
71 |
|
72 |
|
73 |
def answer_question(question, documents):
|
|
|
79 |
|
80 |
st.title("PDF-based RAG Chatbot")
|
81 |
|
82 |
+
uploaded_file = st.file_uploader(
|
83 |
+
"Upload PDF",
|
84 |
+
type="pdf",
|
85 |
+
accept_multiple_files=False
|
86 |
+
)
|
87 |
|
88 |
if uploaded_file:
|
89 |
upload_pdf(uploaded_file)
|