Spaces:

Soumen
/

Text-Summarization-and-NLP-tasks

Sleeping

App Files Files Community

Soumen commited on Sep 14, 2023

Commit

9b4a3a4

1 Parent(s): bff85ef

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -106

app.py CHANGED Viewed

@@ -83,9 +83,11 @@ def bansum(text):
        text_output = out[0]["summary_text"]
        st.success(text_output)
-@st.cache
-def save(l):
-    return l
 #@st.cache
 def main():
     import streamlit as st
@@ -93,110 +95,111 @@ def main():
         st.session_state["photo"]="not done"
     def change_photo_state():
         st.session_state["photo"]="done"
-    with st.container():
-        c1, c2, c3 = st.columns([2,2,1])
-        message = c1.text_input("Type your text here!")
-        if c2.button("CaptureImage"):
-            camera_photo = c2.camera_input("Capture a photo to summarize: ", on_change=change_photo_state)
-        if c2.button("Stop camera"):
-            CaptureImage =False
-        uploaded_photo = save(c3.file_uploader("Upload your Images/PDF",type=['jpg','png','jpeg','pdf'], on_change=change_photo_state))
-        if st.session_state["photo"]=="done" or message:
-            if uploaded_photo and uploaded_photo.type=='application/pdf':
-                tet = read_pdf(uploaded_photo)
-                # with tempfile.NamedTemporaryFile(delete=False) as temp_file:
-                #     temp_file.write(uploaded_photo.read())
-                #     temp_file_path = temp_file.name
-                # loader = PyPDFLoader(temp_file_path)
-                # if loader:
-                #     text.extend(loader.load())
-                #     os.remove(temp_file_path)
-                # text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=100, length_function=len)
-                # text_chunks = text_splitter.split_documents(text)
-                values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
-                text = tet[values[0]*7*10:values[1]*10*100] if values[0]!=len(tet)//(10*100) else tet[len(tet)//(10*100):]
-                #st.success(type(text_chunks))
-                if st.button("English Pdf Summarize"):
-                    st.subheader("Selected text for summarize: ")
-                    st.success(text)
-                    st.subheader("Summarized Text: ")
-                    engsum(text)
-            elif uploaded_photo and uploaded_photo.type !='application/pdf':
-                text=None
-                img = Image.open(uploaded_photo)
-                img = img.save("img.png")
-                img = cv2.imread("img.png")
-                st.text("Select the summarization type:")
-                c4, c5 = st.columns([1,1])
-                if c4.button("BENGALI"):
-                    text =  pytesseract.image_to_string(img, lang="ben")
-                    st.subheader("সারাংশ/সারমর্ম")
-                    bansum(text)
-                if c5.button("ENGLISH"):
-                    text=pytesseract.image_to_string(img)
-                    st.subheader("Summarized Text")
-                    engsum(text)
-                #st.success(text)
-            elif camera_photo:
-                text=None
-                img = Image.open(camera_photo)
-                img = img.save("img.png")
-                img = cv2.imread("img.png")
-                #text = pytesseract.image_to_string(img) if st.checkbox("Bangla") else pytesseract.image_to_string(img, lang="ben")
-                st.text("Select the summarization type:")
-                c6, c7 = st.columns([1,1])
-                if c6.button("Bangla"):
-                    text =  pytesseract.image_to_string(img, lang="ben")
-                    st.subheader("সারাংশ/সারমর্ম")
-                    bansum(text)
-                if c7.button("English"):
-                    text=pytesseract.image_to_string(img)
-                    st.subheader("Summarized Text")
-                    engsum(text)
-            else:
-                text=None
-                text = message
-                c8, c9 = st.columns([1,1])
-                if c8.button("Bangla"):
-                    bansum(text)
-                if c9.button("English"):
-                    engsum(text)
-    with st.container():
-        from streamlit_chat import message as st_message
-        from transformers import BlenderbotTokenizer
-        from transformers import BlenderbotForConditionalGeneration
-        st.title("Chatbot!!!")
-        @st.experimental_singleton
-        def get_models():
-            # it may be necessary for other frameworks to cache the model
-            # seems pytorch keeps an internal state of the conversation
-            model_name = "facebook/blenderbot-400M-distill"
-            tokenizer = BlenderbotTokenizer.from_pretrained(model_name)
-            model = BlenderbotForConditionalGeneration.from_pretrained(model_name)
-            return tokenizer, model
-        if "history" not in st.session_state:
-            st.session_state.history = []
-        def generate_answer():
-            tokenizer, model = get_models()
-            user_message = st.session_state.input_text
-            inputs = tokenizer(st.session_state.input_text, return_tensors="pt")
-            result = model.generate(**inputs)
-            message_bot = tokenizer.decode(
-                result[0], skip_special_tokens=True
-            )  # .replace("<s>", "").replace("</s>", "")
-            st.session_state.history.append({"message": user_message, "is_user": True})
-            st.session_state.history.append({"message": message_bot, "is_user": False})
-        st.text_input("Talk to the bot", key="input_text", on_change=generate_answer)
-        from copyreg import clear_extension_cache
-        for chat in st.session_state.history:
-            st_message(**chat)
-        if st.button("Refresh/New Chat"):
-           st.session_state.history = []

        text_output = out[0]["summary_text"]
        st.success(text_output)
+@st.experimental_singleton
+def save():
+    camera_photo = c2.camera_input("Capture a photo to summarize: ", on_change=change_photo_state)
+    return camera_photo
 #@st.cache
 def main():
     import streamlit as st
         st.session_state["photo"]="not done"
     def change_photo_state():
         st.session_state["photo"]="done"
+    if st.checkbox("Summarize from text/images/pdfs"):
+        with st.container():
+            c1, c2, c3 = st.columns([2,2,1])
+            message = c1.text_input("Type your text here!")
+            if c2.button("CaptureImage"):
+                camera_photo=save()
+            if c2.button("Stop camera"):
+                CaptureImage =False
+            uploaded_photo = save(c3.file_uploader("Upload your Images/PDF",type=['jpg','png','jpeg','pdf'], on_change=change_photo_state))
+            if st.session_state["photo"]=="done" or message:
+                if uploaded_photo and uploaded_photo.type=='application/pdf':
+                    tet = read_pdf(uploaded_photo)
+                    # with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+                    #     temp_file.write(uploaded_photo.read())
+                    #     temp_file_path = temp_file.name
+                    # loader = PyPDFLoader(temp_file_path)
+                    # if loader:
+                    #     text.extend(loader.load())
+                    #     os.remove(temp_file_path)
+                    # text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=100, length_function=len)
+                    # text_chunks = text_splitter.split_documents(text)
+                    values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
+                    text = tet[values[0]*7*10:values[1]*10*100] if values[0]!=len(tet)//(10*100) else tet[len(tet)//(10*100):]
+                    #st.success(type(text_chunks))
+                    if st.button("English Pdf Summarize"):
+                        st.subheader("Selected text for summarize: ")
+                        st.success(text)
+                        st.subheader("Summarized Text: ")
+                        engsum(text)
+                elif uploaded_photo and uploaded_photo.type !='application/pdf':
+                    text=None
+                    img = Image.open(uploaded_photo)
+                    img = img.save("img.png")
+                    img = cv2.imread("img.png")
+                    st.text("Select the summarization type:")
+                    c4, c5 = st.columns([1,1])
+                    if c4.button("BENGALI"):
+                        text =  pytesseract.image_to_string(img, lang="ben")
+                        st.subheader("সারাংশ/সারমর্ম")
+                        bansum(text)
+                    if c5.button("ENGLISH"):
+                        text=pytesseract.image_to_string(img)
+                        st.subheader("Summarized Text")
+                        engsum(text)
+                    #st.success(text)
+                elif camera_photo:
+                    text=None
+                    img = Image.open(camera_photo)
+                    img = img.save("img.png")
+                    img = cv2.imread("img.png")
+                    #text = pytesseract.image_to_string(img) if st.checkbox("Bangla") else pytesseract.image_to_string(img, lang="ben")
+                    st.text("Select the summarization type:")
+                    c6, c7 = st.columns([1,1])
+                    if c6.button("Bangla"):
+                        text =  pytesseract.image_to_string(img, lang="ben")
+                        st.subheader("সারাংশ/সারমর্ম")
+                        bansum(text)
+                    if c7.button("English"):
+                        text=pytesseract.image_to_string(img)
+                        st.subheader("Summarized Text")
+                        engsum(text)
+                else:
+                    text=None
+                    text = message
+                    c8, c9 = st.columns([1,1])
+                    if c8.button("Bangla"):
+                        bansum(text)
+                    if c9.button("English"):
+                        engsum(text)
+    if st.checkbox("Conversate"):
+        with st.container():
+            from streamlit_chat import message as st_message
+            from transformers import BlenderbotTokenizer
+            from transformers import BlenderbotForConditionalGeneration
+            st.title("Chatbot!!!")
+            @st.experimental_singleton
+            def get_models():
+                # it may be necessary for other frameworks to cache the model
+                # seems pytorch keeps an internal state of the conversation
+                model_name = "facebook/blenderbot-400M-distill"
+                tokenizer = BlenderbotTokenizer.from_pretrained(model_name)
+                model = BlenderbotForConditionalGeneration.from_pretrained(model_name)
+                return tokenizer, model
+            if "history" not in st.session_state:
+                st.session_state.history = []
+            def generate_answer():
+                tokenizer, model = get_models()
+                user_message = st.session_state.input_text
+                inputs = tokenizer(st.session_state.input_text, return_tensors="pt")
+                result = model.generate(**inputs)
+                message_bot = tokenizer.decode(
+                    result[0], skip_special_tokens=True
+                )  # .replace("<s>", "").replace("</s>", "")
+                st.session_state.history.append({"message": user_message, "is_user": True})
+                st.session_state.history.append({"message": message_bot, "is_user": False})
+            st.text_input("Talk to the bot", key="input_text", on_change=generate_answer)
+            from copyreg import clear_extension_cache
+            for chat in st.session_state.history:
+                st_message(**chat)
+            if st.button("Refresh/New Chat"):
+               st.session_state.history = []