Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| from groq import Groq | |
| from PyPDF2 import PdfReader | |
| from docx import Document | |
| from sentence_transformers import SentenceTransformer | |
| # Initialize Groq API Client | |
| client = Groq(api_key=os.environ.get("Groq-Api")) | |
| # Title with Book Icon | |
| st.title("π A&Q From a File") | |
| # File Upload | |
| uploaded_file = st.file_uploader("Upload a PDF or DOCX file", type=["pdf", "docx"]) | |
| if uploaded_file: | |
| st.write(f"**File Name:** {uploaded_file.name}") # Display file name | |
| # Read PDF or DOCX content | |
| def extract_text(file): | |
| if file.name.endswith(".pdf"): | |
| reader = PdfReader(file) | |
| return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()]) | |
| elif file.name.endswith(".docx"): | |
| doc = Document(file) | |
| return "\n".join([para.text for para in doc.paragraphs]) | |
| return "" | |
| file_text = extract_text(uploaded_file) | |
| if file_text: | |
| st.success("File uploaded and text extracted successfully!") | |
| st.write("Ask a question about the file:") | |
| query = st.text_input("Enter your question") | |
| if query: | |
| # Chunk & Tokenize | |
| model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| chunks = [file_text[i:i + 512] for i in range(0, len(file_text), 512)] | |
| embeddings = model.encode(chunks) | |
| # Query with Groq API | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| {"role": "user", "content": f"Answer based on this document: {query}\n\n{file_text}"}, | |
| ], | |
| model="llama-3.3-70b-versatile", | |
| ) | |
| # Display Answer | |
| answer = chat_completion.choices[0].message.content | |
| st.subheader("Answer:") | |
| st.write(answer) | |
| else: | |
| st.error("Failed to extract text from the file. Please check the format.") | |