Spaces:
Sleeping
Sleeping
File size: 1,603 Bytes
ba5f07e 41a527e 180125b 20fe924 ba5f07e 030a55c ba5f07e 030a55c ba5f07e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import tempfile
import os
import streamlit as st
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import FAISS
from langchain.embeddings import Embedding
from langchain_community.embeddings.groq import GroqEmbedding
# Function to process PDF
def process_pdf(file):
# Save the uploaded file into a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
tmpfile.write(file.read()) # Write the uploaded file's content
tmpfile_path = tmpfile.name # Get the file path
return tmpfile_path
# Main function to run the app
def main():
st.title("PDF Embedding and Query System")
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if uploaded_file is not None:
# Process the uploaded PDF file
tmp_file_path = process_pdf(uploaded_file)
# Load the PDF content
loader = PyPDFLoader(tmp_file_path)
documents = loader.load()
# Use Groq embeddings (assuming Groq API key is set correctly)
embeddings = GroqEmbedding(api_key="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")
# Create a vector database
vector_db = FAISS.from_documents(documents, embeddings)
# Perform search or other actions
query = st.text_input("Enter a query to search:")
if query:
results = vector_db.similarity_search(query, k=5)
for result in results:
st.write(result["text"])
# Run the app
if __name__ == "__main__":
main()
|