import streamlit as st from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.document_loaders import PyPDFLoader from transformers import T5Tokenizer, T5ForConditionalGeneration from transformers import pipeline import torch import base64 import time from PIL import Image st.image("https://huggingface.co/spaces/wiwaaw/summary/resolve/main/banner.png") #MODEL AND TOKENIZER model_checkpoint = "MBZUAI/LaMini-Flan-T5-783M" model_tokenizer = T5Tokenizer.from_pretrained(model_checkpoint) model = T5ForConditionalGeneration.from_pretrained(model_checkpoint) #FILE LOADER AND PREPROCESSING def preprocess_pdf(file): loader = PyPDFLoader(file) pages = loader.load_and_split() text_splitter = RecursiveCharacterTextSplitter(chunk_size=170, chunk_overlap=70) texts = text_splitter.split_documents(pages) final_text = "" for text in texts: final_text = final_text + text.page_content return final_text @st.cache_data #LLM PIPELINE def language_model_pipeline(filepath): summarization_pipeline = pipeline( 'summarization', model = model, tokenizer = model_tokenizer, max_length = 500, min_length = 32 ) input_text = preprocess_pdf(filepath) summary_result = summarization_pipeline(input_text) summarized_text = summary_result[0]['summary_text'] return summarized_text title = st.title("PDF Summarization using LaMini") uploaded_file = st.file_uploader('Upload your PDF file', type=['pdf']) if uploaded_file is not None: st.success("File Uploaded") if st.button ("Summarize"): time.sleep(5) #filepath = uploaded_file.name #with open(filepath, "wb") as temp_file: #temp_file.write(uploaded_file.read()) #summarized_result = language_model_pipeline(filepath) st.info("Summarization Complete") #st.success(summarized_result) st.success("Pemerintah bisa memulainya dengan meningkatkan kesejahteraan guru dan sekolah anak hingga mengadakan beragam tempat yang ramah anak Jadi, itulah alasan mengapa pendidikan anak usia dini begitu penting serta hal yang bisa dilakukan oleh pemerintah dan masyarakat.")