import asyncio import os import re import pdfplumber import streamlit as st import torch from transformers import pipeline from dataclasses import dataclass from streamlit_pdf_viewer import pdf_viewer from pydantic_ai import Agent, RunContext, Tool from pydantic_ai.models.groq import GroqModel from pydantic_ai.messages import ModelMessage import presentation as customClass import nest_asyncio # Load API key api_key = os.getenv("API_KEY") if not api_key: raise ValueError("API_KEY is not set in the environment variables.") data = [] result_data: list[customClass.PPT] = [] # Initialize models model = GroqModel("llama3-groq-70b-8192-tool-use-preview", api_key=api_key) summarizer = pipeline("summarization", model="facebook/bart-large-cnn") def split_into_token_chunks(text: str, max_tokens: int = 300) -> list: """ Splits a long string into chunks of a specified maximum number of tokens (words). """ tokens = text.split() return [' '.join(tokens[i:i + max_tokens]) for i in range(0, len(tokens), max_tokens)] def return_data() -> str: """Returns concatenated extracted data.""" return "\n".join(data) @dataclass class SupportDependencies: db: str async def ppt_content(data): """ Generates PowerPoint content using an AI model. """ if not data: raise ValueError("No valid text found for PowerPoint generation.") agent = Agent( model, result_type=customClass.PPT, tools=[return_data], system_prompt=""" You are an expert in creating PowerPoint presentations. Create 5 slides: 1. Title Slide: Introduction about the presentation. 2. Methodology Slide: Summarize the methodology in detail. 3. Results Slide: Present key findings in bullet points. 4. Discussion Slide: Summarize implications and limitations. 5. Conclusion Slide: State the overall conclusion. Each slide should have: - Title: Clear and concise. - Text: Short and informative explanation. - Bullet Points: 3-5 summarized key takeaways. """ ) listOfString = split_into_token_chunks("\n".join(data)) message_history: list[ModelMessage] = [] result = agent.run_sync(user_prompt=f"Create a PowerPoint presentation from {listOfString[0]}", message_history=message_history) for i in range(1, len(listOfString)): result = agent.run_sync(user_prompt=f"Continue creating the PowerPoint presentation from {listOfString[i]}", message_history=result.all_messages()) print(result.data) def ai_ppt(data): """Runs the PowerPoint generation in an async loop.""" loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) loop.run_until_complete(ppt_content(data=data)) def extract_data(feed): """Extracts text from PDF and appends to `data` list.""" global data data = [] # Reset data before extracting with pdfplumber.open(feed) as pdf: for p in pdf.pages: text = p.extract_text() if text: data.append(text) def main(): """Main Streamlit app function.""" st.title("AI-Powered PowerPoint Generator") uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") if uploaded_file is not None: extract_data(uploaded_file) if st.button("Generate PPT"): try: ai_ppt(data) st.success("PowerPoint generation completed!") except Exception as e: st.error(f"Error generating PPT: {e}") # Display PDF binary_data = uploaded_file.getvalue() pdf_viewer(input=binary_data, width=700) if __name__ == '__main__': nest_asyncio.apply() main()