Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
from groq import Groq | |
from PyPDF2 import PdfReader | |
from docx import Document | |
from sentence_transformers import SentenceTransformer | |
import faiss | |
import numpy as np | |
# Initialize Groq API Client | |
client = Groq(api_key=os.environ.get("Groq_Api")) | |
# Title with Book Icon | |
st.title("π A&Q From a File") | |
# File Upload | |
uploaded_file = st.file_uploader("Upload a PDF or DOCX file", type=["pdf", "docx"]) | |
if uploaded_file: | |
st.write(f"**File Name:** {uploaded_file.name}") # Display file name | |
# Extract Text | |
def extract_text(file): | |
if file.name.endswith(".pdf"): | |
reader = PdfReader(file) | |
return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()]) | |
elif file.name.endswith(".docx"): | |
doc = Document(file) | |
return "\n".join([para.text for para in doc.paragraphs]) | |
return "" | |
file_text = extract_text(uploaded_file) | |
if file_text: | |
st.success("File uploaded and text extracted successfully!") | |
st.write("Ask a question about the file:") | |
query = st.text_input("Enter your question") | |
if query: | |
# Load Sentence Transformer Model | |
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
# Chunk & Embed Text | |
chunk_size = 512 | |
chunks = [file_text[i:i + chunk_size] for i in range(0, len(file_text), chunk_size)] | |
embeddings = model.encode(chunks, convert_to_numpy=True) | |
# Build FAISS Index for Fast Retrieval | |
index = faiss.IndexFlatL2(embeddings.shape[1]) | |
index.add(embeddings) | |
# Query Embedding | |
query_embedding = model.encode([query], convert_to_numpy=True) | |
_, retrieved_idx = index.search(query_embedding, k=3) | |
# Retrieve Top 3 Relevant Chunks | |
relevant_text = " ".join([chunks[i] for i in retrieved_idx[0]]) | |
# Query Groq API with relevant chunks only | |
chat_completion = client.chat.completions.create( | |
messages=[ | |
{"role": "user", "content": f"Answer based on this document: {query}\n\n{relevant_text}"}, | |
], | |
model="llama-3.3-70b-versatile", | |
) | |
# Display Answer | |
answer = chat_completion.choices[0].message.content | |
st.subheader("Answer:") | |
st.write(answer) | |
else: | |
st.error("Failed to extract text from the file. Please check the format.") | |