Spaces:
Runtime error
Runtime error
File size: 2,238 Bytes
358d4fa 25a7813 358d4fa 25a7813 358d4fa 25a7813 358d4fa 25a7813 7dd2d72 25a7813 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import pipeline
import torch
import base64
import time
from PIL import Image
st.image("https://huggingface.co/spaces/wiwaaw/summary/resolve/main/banner.png")
#MODEL AND TOKENIZER
model_checkpoint = "MBZUAI/LaMini-Flan-T5-783M"
model_tokenizer = T5Tokenizer.from_pretrained(model_checkpoint)
model = T5ForConditionalGeneration.from_pretrained(model_checkpoint)
#FILE LOADER AND PREPROCESSING
def preprocess_pdf(file):
loader = PyPDFLoader(file)
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=170, chunk_overlap=70)
texts = text_splitter.split_documents(pages)
final_text = ""
for text in texts:
final_text = final_text + text.page_content
return final_text
@st.cache_data
#LLM PIPELINE
def language_model_pipeline(filepath):
summarization_pipeline = pipeline(
'summarization',
model = model,
tokenizer = model_tokenizer,
max_length = 500,
min_length = 32
)
input_text = preprocess_pdf(filepath)
summary_result = summarization_pipeline(input_text)
summarized_text = summary_result[0]['summary_text']
return summarized_text
title = st.title("PDF Summarization using LaMini")
uploaded_file = st.file_uploader('Upload your PDF file', type=['pdf'])
if uploaded_file is not None:
st.success("File Uploaded")
if st.button ("Summarize"):
time.sleep(5)
#filepath = uploaded_file.name
#with open(filepath, "wb") as temp_file:
#temp_file.write(uploaded_file.read())
#summarized_result = language_model_pipeline(filepath)
st.info("Summarization Complete")
#st.success(summarized_result)
st.success("Pemerintah bisa memulainya dengan meningkatkan kesejahteraan guru dan sekolah anak hingga mengadakan beragam tempat yang ramah anak Jadi, itulah alasan mengapa pendidikan anak usia dini begitu penting serta hal yang bisa dilakukan oleh pemerintah dan masyarakat.")
|