Spaces:
Running
Running
File size: 3,774 Bytes
5d14cc6 c8ecbd4 f78e807 c8ecbd4 4ba3023 c8ecbd4 ec1c0d9 c52e882 d553fab c8ecbd4 a8e9d4c c8ecbd4 ec1c0d9 c8ecbd4 ec1c0d9 c8ecbd4 0e1f166 c8ecbd4 6993c74 f2fb591 185bc0f f2fb591 c8ecbd4 2afa0ec 299c4e4 c8ecbd4 299c4e4 fae7389 c8ecbd4 2afa0ec 5d14cc6 c8ecbd4 2211ca4 8028338 c8ecbd4 b8dc120 c8ecbd4 766236b c8ecbd4 c52e882 c8ecbd4 5d14cc6 c8ecbd4 5d14cc6 ec1c0d9 c8ecbd4 86551a1 c8ecbd4 a91875b c8ecbd4 0042245 a91875b 612c0bf c8ecbd4 612c0bf c8ecbd4 a91875b a5f868b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import asyncio
import os
import re
import pdfplumber
import streamlit as st
import torch
from transformers import pipeline
from dataclasses import dataclass
from streamlit_pdf_viewer import pdf_viewer
from pydantic_ai import Agent, RunContext, Tool
from pydantic_ai.models.groq import GroqModel
from pydantic_ai.messages import ModelMessage
import presentation as customClass
import nest_asyncio
# Load API key
api_key = os.getenv("API_KEY")
if not api_key:
raise ValueError("API_KEY is not set in the environment variables.")
data = []
result_data: list[customClass.PPT] = []
# Initialize models
model = GroqModel("llama3-groq-70b-8192-tool-use-preview", api_key=api_key)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def split_into_token_chunks(text: str, max_tokens: int = 300) -> list:
"""
Splits a long string into chunks of a specified maximum number of tokens (words).
"""
tokens = text.split()
return [' '.join(tokens[i:i + max_tokens]) for i in range(0, len(tokens), max_tokens)]
def return_data() -> str:
"""Returns concatenated extracted data."""
return "\n".join(data)
@dataclass
class SupportDependencies:
db: str
async def ppt_content(data):
"""
Generates PowerPoint content using an AI model.
"""
if not data:
raise ValueError("No valid text found for PowerPoint generation.")
agent = Agent(
model,
result_type=customClass.PPT,
tools=[return_data],
system_prompt="""
You are an expert in creating PowerPoint presentations.
Create 5 slides:
1. Title Slide: Introduction about the presentation.
2. Methodology Slide: Summarize the methodology in detail.
3. Results Slide: Present key findings in bullet points.
4. Discussion Slide: Summarize implications and limitations.
5. Conclusion Slide: State the overall conclusion.
Each slide should have:
- Title: Clear and concise.
- Text: Short and informative explanation.
- Bullet Points: 3-5 summarized key takeaways.
"""
)
listOfString = split_into_token_chunks("\n".join(data))
message_history: list[ModelMessage] = []
result = agent.run_sync(user_prompt=f"Create a PowerPoint presentation from {listOfString[0]}", message_history=message_history)
for i in range(1, len(listOfString)):
result = agent.run_sync(user_prompt=f"Continue creating the PowerPoint presentation from {listOfString[i]}", message_history=result.all_messages())
print(result.data)
def ai_ppt(data):
"""Runs the PowerPoint generation in an async loop."""
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(ppt_content(data=data))
def extract_data(feed):
"""Extracts text from PDF and appends to `data` list."""
global data
data = [] # Reset data before extracting
with pdfplumber.open(feed) as pdf:
for p in pdf.pages:
text = p.extract_text()
if text:
data.append(text)
def main():
"""Main Streamlit app function."""
st.title("AI-Powered PowerPoint Generator")
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
if uploaded_file is not None:
extract_data(uploaded_file)
if st.button("Generate PPT"):
try:
ai_ppt(data)
st.success("PowerPoint generation completed!")
except Exception as e:
st.error(f"Error generating PPT: {e}")
# Display PDF
binary_data = uploaded_file.getvalue()
pdf_viewer(input=binary_data, width=700)
if __name__ == '__main__':
nest_asyncio.apply()
main()
|