Ahmadkhan12 commited on
Commit
41a527e
·
verified ·
1 Parent(s): 4c923f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -47
app.py CHANGED
@@ -1,41 +1,53 @@
1
  import streamlit as st
2
- from langchain_community.vectorstores import FAISS # Updated import
 
3
  from langchain.embeddings import OpenAIEmbeddings
4
  from langchain.document_loaders import PyPDFLoader
5
- from langchain.llms import HuggingFacePipeline
6
  from langchain.chains import RetrievalQA
7
- import groqapi
8
 
9
- # Step 1: Initialize Groq API and Llama Model
10
- def load_llama_model(api_key, model_name):
11
- """Load the Llama model using Groq API."""
12
- groqapi.set_api_key(api_key)
13
- return HuggingFacePipeline.from_pretrained(model_name)
14
 
15
- # Step 2: Load and Process PDF
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def process_pdf(pdf_path):
17
  """Load and split the PDF into documents."""
18
  loader = PyPDFLoader(pdf_path)
19
  documents = loader.load_and_split()
20
  return documents
21
 
22
- # Step 3: Create Vector Database
23
  def create_vector_db(documents):
24
  """Create a FAISS vector database from documents."""
25
  embeddings = OpenAIEmbeddings() # Use OpenAI embeddings for vectorization
26
  vector_db = FAISS.from_documents(documents, embeddings)
27
  return vector_db
28
 
29
- # Step 4: Build RAG Pipeline
30
- def build_rag_pipeline(vector_db, llama_model):
31
  """Build the Retrieval-Augmented Generation (RAG) pipeline."""
32
  retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
33
- qa_chain = RetrievalQA.from_chain_type(
34
- retriever=retriever,
35
- llm=llama_model,
36
- return_source_documents=True
37
- )
38
- return qa_chain
39
 
40
  # Streamlit App
41
  def main():
@@ -50,38 +62,25 @@ def main():
50
  documents = process_pdf("uploaded_act.pdf")
51
  st.success("PDF Loaded and Processed Successfully!")
52
 
53
- # Step 2: Input Groq API Key
54
- api_key = st.text_input("Enter your Groq API Key", type="password")
55
- model_name = "llama-3.1-8b-instant"
56
-
57
- if api_key and st.button("Load Llama Model"):
58
- try:
59
- # Load Llama Model
60
- llama_model = load_llama_model(api_key, model_name)
61
- st.success("Llama Model Loaded Successfully!")
62
-
63
- # Build Vector DB and QA Chain
64
- vector_db = create_vector_db(documents)
65
- qa_chain = build_rag_pipeline(vector_db, llama_model)
66
-
67
- # Step 3: Ask Questions
68
- query = st.text_input("Ask a question:")
69
- if query:
70
- with st.spinner("Fetching Answer..."):
71
- response = qa_chain({"query": query})
72
- answer = response["result"]
73
- source_docs = response["source_documents"]
74
 
75
- # Display Answer and Sources
76
- st.write("### Answer:")
77
- st.write(answer)
78
 
79
- st.write("### Sources:")
80
- for doc in source_docs:
81
- st.write(f"Source: {doc.metadata.get('source', 'Unknown')}")
 
 
 
 
82
 
83
- except Exception as e:
84
- st.error(f"Error loading model or processing query: {e}")
85
 
86
  if __name__ == "__main__":
87
  main()
 
1
  import streamlit as st
2
+ import os
3
+ from langchain.vectorstores import FAISS
4
  from langchain.embeddings import OpenAIEmbeddings
5
  from langchain.document_loaders import PyPDFLoader
 
6
  from langchain.chains import RetrievalQA
7
+ from groq import Groq
8
 
9
+ # Set the API Key directly (Not recommended for production)
10
+ GROQ_API_KEY = "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"
 
 
 
11
 
12
+ # Initialize Groq client
13
+ def initialize_groq_client():
14
+ """Initialize the Groq client with the API key."""
15
+ os.environ["GROQ_API_KEY"] = GROQ_API_KEY
16
+ return Groq(api_key=GROQ_API_KEY)
17
+
18
+ # Generate response using Groq API
19
+ def generate_response(client, query, model_name="llama3-8b-8192"):
20
+ """Generate a response using Groq's chat completion."""
21
+ chat_completion = client.chat.completions.create(
22
+ messages=[
23
+ {
24
+ "role": "user",
25
+ "content": query,
26
+ }
27
+ ],
28
+ model=model_name,
29
+ )
30
+ return chat_completion.choices[0].message.content
31
+
32
+ # Load and process PDF
33
  def process_pdf(pdf_path):
34
  """Load and split the PDF into documents."""
35
  loader = PyPDFLoader(pdf_path)
36
  documents = loader.load_and_split()
37
  return documents
38
 
39
+ # Create FAISS vector database
40
  def create_vector_db(documents):
41
  """Create a FAISS vector database from documents."""
42
  embeddings = OpenAIEmbeddings() # Use OpenAI embeddings for vectorization
43
  vector_db = FAISS.from_documents(documents, embeddings)
44
  return vector_db
45
 
46
+ # Build RAG pipeline
47
+ def build_rag_pipeline(vector_db, groq_client):
48
  """Build the Retrieval-Augmented Generation (RAG) pipeline."""
49
  retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
50
+ return retriever, groq_client
 
 
 
 
 
51
 
52
  # Streamlit App
53
  def main():
 
62
  documents = process_pdf("uploaded_act.pdf")
63
  st.success("PDF Loaded and Processed Successfully!")
64
 
65
+ # Initialize Groq Client
66
+ try:
67
+ groq_client = initialize_groq_client()
68
+ st.success("Groq Client Initialized Successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ # Build Vector DB and QA Chain
71
+ vector_db = create_vector_db(documents)
72
+ retriever, client = build_rag_pipeline(vector_db, groq_client)
73
 
74
+ # Step 3: Ask Questions
75
+ query = st.text_input("Ask a question:")
76
+ if query:
77
+ with st.spinner("Fetching Answer..."):
78
+ response = generate_response(client, query)
79
+ st.write("### Answer:")
80
+ st.write(response)
81
 
82
+ except Exception as e:
83
+ st.error(f"Error loading client or processing query: {e}")
84
 
85
  if __name__ == "__main__":
86
  main()