File size: 2,238 Bytes
358d4fa
 
 
 
 
 
 
25a7813
358d4fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25a7813
358d4fa
25a7813
 
 
 
 
 
358d4fa
25a7813
 
 
 
7dd2d72
25a7813
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import pipeline
import torch
import base64
import time
from PIL import Image

st.image("https://huggingface.co/spaces/wiwaaw/summary/resolve/main/banner.png")

#MODEL AND TOKENIZER
model_checkpoint = "MBZUAI/LaMini-Flan-T5-783M"
model_tokenizer = T5Tokenizer.from_pretrained(model_checkpoint)
model = T5ForConditionalGeneration.from_pretrained(model_checkpoint)

#FILE LOADER AND PREPROCESSING
def preprocess_pdf(file):
    loader = PyPDFLoader(file)
    pages = loader.load_and_split()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=170, chunk_overlap=70)
    texts = text_splitter.split_documents(pages)
    final_text = ""
    for text in texts:
        final_text = final_text + text.page_content
    return final_text

@st.cache_data
#LLM PIPELINE
def language_model_pipeline(filepath):
    summarization_pipeline = pipeline(
        'summarization',
        model = model,
        tokenizer = model_tokenizer,
        max_length = 500,
        min_length = 32
    )
    input_text = preprocess_pdf(filepath)
    summary_result = summarization_pipeline(input_text)
    summarized_text = summary_result[0]['summary_text']
    return summarized_text

title = st.title("PDF Summarization using LaMini")
uploaded_file = st.file_uploader('Upload your PDF file', type=['pdf'])
if uploaded_file is not None:
    st.success("File Uploaded")
    if st.button ("Summarize"):
        time.sleep(5)
        
        
        #filepath = uploaded_file.name
        #with open(filepath, "wb") as temp_file:
            #temp_file.write(uploaded_file.read())
        
        #summarized_result = language_model_pipeline(filepath)
        st.info("Summarization Complete")
        #st.success(summarized_result)



        st.success("Pemerintah bisa memulainya dengan meningkatkan kesejahteraan  guru dan sekolah anak hingga mengadakan beragam tempat yang ramah anak   Jadi, itulah alasan mengapa pendidikan anak usia dini begitu penting serta hal yang bisa  dilakukan oleh pemerintah dan masyarakat.")