christian1984 commited on
Commit
cd24b00
·
verified ·
1 Parent(s): 98c776b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import requests
4
+ from langchain_community.document_loaders import PDFPlumberLoader
5
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
6
+ from langchain_core.vectorstores import InMemoryVectorStore
7
+ from langchain_core.prompts import ChatPromptTemplate
8
+
9
+ HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
10
+
11
+ HF_API_URL = "https://api-inference.huggingface.co/models/deepseek-ai/deepseek-llm-7b"
12
+
13
+ HEADERS = {"Authorization": f"Bearer {HUGGINGFACE_API_KEY}"}
14
+
15
+ pdfs_directory = "./pdfs/"
16
+
17
+ template = """
18
+ You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
19
+ Question: {question}
20
+ Context: {context}
21
+ Answer:
22
+ """
23
+
24
+ vector_store = InMemoryVectorStore()
25
+
26
+
27
+ def upload_pdf(file):
28
+ """Save uploaded PDF file"""
29
+ with open(pdfs_directory + file.name, "wb") as f:
30
+ f.write(file.getbuffer())
31
+
32
+
33
+ def load_pdf(file_path):
34
+ """Load text from the PDF"""
35
+ loader = PDFPlumberLoader(file_path)
36
+ documents = loader.load()
37
+
38
+ return documents
39
+
40
+
41
+
42
+ def split_text(documents):
43
+ """Split text into smaller chunks"""
44
+ text_splitter = RecursiveCharacterTextSplitter(
45
+ chunk_size=1000,
46
+ chunk_overlap=200,
47
+ add_start_index=True,
48
+ )
49
+
50
+ return text_splitter.split_documents(documents)
51
+
52
+
53
+ def index_docs(documents):
54
+ vector_store.add_documents(documents)
55
+
56
+
57
+
58
+ def retrieve_docs(query):
59
+ """Retrieve similar documents"""
60
+ return vector_store.similarity_search(query)
61
+
62
+
63
+ def query_huggingface_api(prompt):
64
+ """Send query to DeepSeek R1 model on Hugging Face"""
65
+ payload = {"inputs": prompt}
66
+ response = requests.post(HF_API_URL, headers=HEADERS, json=payload)
67
+ if response.status_code == 200:
68
+ return response.json()[0]["generated_text"]
69
+ else:
70
+ return "Error: Unable to process request."
71
+
72
+
73
+ def answer_question(question, documents):
74
+ """Generate answer using DeepSeek R1 API"""
75
+ context = "\n\n".join([doc.page_content for doc in documents])
76
+ prompt = ChatPromptTemplate.from_template(template).format(question=question, context=context)
77
+ return query_huggingface_api(prompt)
78
+
79
+
80
+ st.title("PDF-based RAG Chatbot")
81
+
82
+ uploaded_file = st.file_uploader("Upload PDF", type="pdf", accept_multiple_files=False)
83
+
84
+ if uploaded_file:
85
+ upload_pdf(uploaded_file)
86
+ documents = load_pdf(pdfs_directory + uploaded_file.name)
87
+ chunked_documents = split_text(documents)
88
+ index_docs(chunked_documents)
89
+
90
+ question = st.chat_input("Ask a question based on the document")
91
+
92
+ if question:
93
+ st.chat_message("user").write(question)
94
+ related_documents = retrieve_docs(question)
95
+ answer = answer_question(question, related_documents)
96
+ st.chat_message("assistant").write(answer)