nurindahpratiwi
first commit
7dd2d72
raw
history blame
2.24 kB
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import pipeline
import torch
import base64
import time
from PIL import Image
st.image("https://huggingface.co/spaces/wiwaaw/summary/resolve/main/banner.png")
#MODEL AND TOKENIZER
model_checkpoint = "MBZUAI/LaMini-Flan-T5-783M"
model_tokenizer = T5Tokenizer.from_pretrained(model_checkpoint)
model = T5ForConditionalGeneration.from_pretrained(model_checkpoint)
#FILE LOADER AND PREPROCESSING
def preprocess_pdf(file):
loader = PyPDFLoader(file)
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=170, chunk_overlap=70)
texts = text_splitter.split_documents(pages)
final_text = ""
for text in texts:
final_text = final_text + text.page_content
return final_text
@st.cache_data
#LLM PIPELINE
def language_model_pipeline(filepath):
summarization_pipeline = pipeline(
'summarization',
model = model,
tokenizer = model_tokenizer,
max_length = 500,
min_length = 32
)
input_text = preprocess_pdf(filepath)
summary_result = summarization_pipeline(input_text)
summarized_text = summary_result[0]['summary_text']
return summarized_text
title = st.title("PDF Summarization using LaMini")
uploaded_file = st.file_uploader('Upload your PDF file', type=['pdf'])
if uploaded_file is not None:
st.success("File Uploaded")
if st.button ("Summarize"):
time.sleep(5)
#filepath = uploaded_file.name
#with open(filepath, "wb") as temp_file:
#temp_file.write(uploaded_file.read())
#summarized_result = language_model_pipeline(filepath)
st.info("Summarization Complete")
#st.success(summarized_result)
st.success("Pemerintah bisa memulainya dengan meningkatkan kesejahteraan guru dan sekolah anak hingga mengadakan beragam tempat yang ramah anak Jadi, itulah alasan mengapa pendidikan anak usia dini begitu penting serta hal yang bisa dilakukan oleh pemerintah dan masyarakat.")