abdullahzunorain commited on
Commit
e625c0b
·
verified ·
1 Parent(s): 49b1967

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from sentence_transformers import SentenceTransformer, util
4
+ from groq import Groq
5
+ from PyPDF2 import PdfReader
6
+
7
+ # Initialize the retriever and Groq client
8
+ retriever = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
9
+ client = Groq(api_key=groq_api) # Replace with your actual Groq API key
10
+
11
+ # Knowledge base (documents) and embeddings
12
+ documents = [
13
+ "Retrieval-Augmented Generation (RAG) is an AI framework that combines the strengths of retrieval-based and generative models.",
14
+ "The main components of a RAG system are the retriever and the generator.",
15
+ "A key benefit of Retrieval-Augmented Generation is that it can produce more accurate responses compared to standalone generative models.",
16
+ "The retrieval process in a RAG system often relies on embedding-based models, like Sentence-BERT or DPR.",
17
+ "Common use cases of RAG include chatbots, customer support systems, and knowledge retrieval for business intelligence."
18
+ ]
19
+ document_embeddings = retriever.encode(documents, convert_to_tensor=True)
20
+
21
+ # Function to retrieve top relevant document and truncate context if too long
22
+ def retrieve(query, top_k=1, max_tokens=100):
23
+ query_embedding = retriever.encode(query, convert_to_tensor=True)
24
+ hits = util.semantic_search(query_embedding, document_embeddings, top_k=top_k)
25
+ top_docs = [documents[hit['corpus_id']] for hit in hits[0]]
26
+
27
+ # Truncate context to max_tokens if necessary
28
+ context = top_docs[0] if hits[0] else ""
29
+ context = ' '.join(context.split()[:max_tokens]) # Limit to max_tokens words
30
+ return context
31
+
32
+ # Function to generate response using Groq
33
+ def generate_response(query, context):
34
+ response = client.chat.completions.create(
35
+ messages=[
36
+ {
37
+ "role": "user",
38
+ "content": f"Context: {context} Question: {query} Answer:"
39
+ }
40
+ ],
41
+ model="gemma2-9b-it"
42
+ )
43
+ return response.choices[0].message.content
44
+
45
+ # Function to handle PDF upload and text extraction
46
+ def extract_text_from_pdf(file):
47
+ pdf_reader = PdfReader(file)
48
+ text = ""
49
+ for page in pdf_reader.pages:
50
+ text += page.extract_text()
51
+ return text
52
+
53
+ # Function to update knowledge base with new content from PDF
54
+ def update_knowledge_base(pdf_text):
55
+ global documents, document_embeddings
56
+ documents.append(pdf_text)
57
+ document_embeddings = retriever.encode(documents, convert_to_tensor=True)
58
+
59
+ # Streamlit app layout
60
+ st.title("RAG-based Question Answering App")
61
+ st.write("Upload a PDF, ask questions based on its content, and get answers!")
62
+
63
+ # Upload PDF file
64
+ uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
65
+ if uploaded_file:
66
+ pdf_text = extract_text_from_pdf(uploaded_file)
67
+ update_knowledge_base(pdf_text)
68
+ st.write("PDF content successfully added to the knowledge base.")
69
+
70
+ # Question input
71
+ question = st.text_input("Enter your question:")
72
+ if question:
73
+ retrieved_context = retrieve(question)
74
+ if retrieved_context:
75
+ answer = generate_response(question, retrieved_context)
76
+ else:
77
+ answer = "I have no knowledge about this topic."
78
+ st.write("Answer:", answer)