# -*- coding: utf-8 -*- """app.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1XblbxoRxB4XOHixjGij789FPD9KjKdhi """ import os import PyPDF2 import gradio as gr from langchain_groq.chat_models import ChatGroq # Set Groq API key securely GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Fetch from environment variables # Ensure API key is available if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY is not set. Add it in Hugging Face Secrets.") # Initialize LLM (Mistral-8x7B) llm = ChatGroq(model_name="mixtral-8x7b-32768") def extract_text_from_pdf(pdf_file): """Extract text from a PDF file.""" text = "" reader = PyPDF2.PdfReader(pdf_file) for page in reader.pages: page_text = page.extract_text() if page_text: text += page_text + "\n" return text def summarize_text(text): """Summarize the text""" prompt = f"Summarize the following document:\n\n{text[:10000]}" # Limit input size response = llm.predict(prompt) return response def process_pdf(file): """Extract text and summarize PDF using Mistral-8x7B.""" if file is None: return "No file uploaded." # Read file bytes and process it using PyPDF2 pdf_reader = PyPDF2.PdfReader(file) text = "" for page in pdf_reader.pages: page_text = page.extract_text() if page_text: text += page_text + "\n" # Limit text size for API efficiency text = text[:10000] if len(text) > 10000 else text # Summarize summary = summarize_text(text) return summary # Create Gradio Interface interface = gr.Interface( fn=process_pdf, inputs=gr.File(label="Upload a PDF"), outputs="text", title="📄 PDF Summarizer", description="Upload a PDF file and get a summary" ) # Run the app interface.launch()