Soumen commited on
Commit
9b4a3a4
·
1 Parent(s): bff85ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -106
app.py CHANGED
@@ -83,9 +83,11 @@ def bansum(text):
83
  text_output = out[0]["summary_text"]
84
  st.success(text_output)
85
 
86
- @st.cache
87
- def save(l):
88
- return l
 
 
89
  #@st.cache
90
  def main():
91
  import streamlit as st
@@ -93,110 +95,111 @@ def main():
93
  st.session_state["photo"]="not done"
94
  def change_photo_state():
95
  st.session_state["photo"]="done"
96
- with st.container():
97
- c1, c2, c3 = st.columns([2,2,1])
98
- message = c1.text_input("Type your text here!")
99
- if c2.button("CaptureImage"):
100
- camera_photo = c2.camera_input("Capture a photo to summarize: ", on_change=change_photo_state)
101
- if c2.button("Stop camera"):
102
- CaptureImage =False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- uploaded_photo = save(c3.file_uploader("Upload your Images/PDF",type=['jpg','png','jpeg','pdf'], on_change=change_photo_state))
105
- if st.session_state["photo"]=="done" or message:
106
- if uploaded_photo and uploaded_photo.type=='application/pdf':
107
- tet = read_pdf(uploaded_photo)
108
- # with tempfile.NamedTemporaryFile(delete=False) as temp_file:
109
- # temp_file.write(uploaded_photo.read())
110
- # temp_file_path = temp_file.name
111
-
112
- # loader = PyPDFLoader(temp_file_path)
113
- # if loader:
114
- # text.extend(loader.load())
115
- # os.remove(temp_file_path)
116
- # text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=100, length_function=len)
117
- # text_chunks = text_splitter.split_documents(text)
118
- values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
119
- text = tet[values[0]*7*10:values[1]*10*100] if values[0]!=len(tet)//(10*100) else tet[len(tet)//(10*100):]
120
- #st.success(type(text_chunks))
121
- if st.button("English Pdf Summarize"):
122
- st.subheader("Selected text for summarize: ")
123
- st.success(text)
124
- st.subheader("Summarized Text: ")
125
- engsum(text)
126
-
127
- elif uploaded_photo and uploaded_photo.type !='application/pdf':
128
- text=None
129
- img = Image.open(uploaded_photo)
130
- img = img.save("img.png")
131
- img = cv2.imread("img.png")
132
- st.text("Select the summarization type:")
133
- c4, c5 = st.columns([1,1])
134
- if c4.button("BENGALI"):
135
- text = pytesseract.image_to_string(img, lang="ben")
136
- st.subheader("সারাংশ/সারমর্ম")
137
- bansum(text)
138
- if c5.button("ENGLISH"):
139
- text=pytesseract.image_to_string(img)
140
- st.subheader("Summarized Text")
141
- engsum(text)
142
- #st.success(text)
143
- elif camera_photo:
144
- text=None
145
- img = Image.open(camera_photo)
146
- img = img.save("img.png")
147
- img = cv2.imread("img.png")
148
- #text = pytesseract.image_to_string(img) if st.checkbox("Bangla") else pytesseract.image_to_string(img, lang="ben")
149
- st.text("Select the summarization type:")
150
- c6, c7 = st.columns([1,1])
151
- if c6.button("Bangla"):
152
- text = pytesseract.image_to_string(img, lang="ben")
153
- st.subheader("সারাংশ/সারমর্ম")
154
- bansum(text)
155
- if c7.button("English"):
156
- text=pytesseract.image_to_string(img)
157
- st.subheader("Summarized Text")
158
- engsum(text)
159
- else:
160
- text=None
161
- text = message
162
- c8, c9 = st.columns([1,1])
163
- if c8.button("Bangla"):
164
- bansum(text)
165
- if c9.button("English"):
166
- engsum(text)
167
-
168
- with st.container():
169
- from streamlit_chat import message as st_message
170
- from transformers import BlenderbotTokenizer
171
- from transformers import BlenderbotForConditionalGeneration
172
- st.title("Chatbot!!!")
173
-
174
- @st.experimental_singleton
175
- def get_models():
176
- # it may be necessary for other frameworks to cache the model
177
- # seems pytorch keeps an internal state of the conversation
178
- model_name = "facebook/blenderbot-400M-distill"
179
- tokenizer = BlenderbotTokenizer.from_pretrained(model_name)
180
- model = BlenderbotForConditionalGeneration.from_pretrained(model_name)
181
- return tokenizer, model
182
- if "history" not in st.session_state:
183
- st.session_state.history = []
184
- def generate_answer():
185
- tokenizer, model = get_models()
186
- user_message = st.session_state.input_text
187
- inputs = tokenizer(st.session_state.input_text, return_tensors="pt")
188
- result = model.generate(**inputs)
189
- message_bot = tokenizer.decode(
190
- result[0], skip_special_tokens=True
191
- ) # .replace("<s>", "").replace("</s>", "")
192
- st.session_state.history.append({"message": user_message, "is_user": True})
193
- st.session_state.history.append({"message": message_bot, "is_user": False})
194
- st.text_input("Talk to the bot", key="input_text", on_change=generate_answer)
195
- from copyreg import clear_extension_cache
196
- for chat in st.session_state.history:
197
- st_message(**chat)
198
- if st.button("Refresh/New Chat"):
199
- st.session_state.history = []
200
 
201
 
202
 
 
83
  text_output = out[0]["summary_text"]
84
  st.success(text_output)
85
 
86
+ @st.experimental_singleton
87
+ def save():
88
+ camera_photo = c2.camera_input("Capture a photo to summarize: ", on_change=change_photo_state)
89
+ return camera_photo
90
+
91
  #@st.cache
92
  def main():
93
  import streamlit as st
 
95
  st.session_state["photo"]="not done"
96
  def change_photo_state():
97
  st.session_state["photo"]="done"
98
+ if st.checkbox("Summarize from text/images/pdfs"):
99
+ with st.container():
100
+ c1, c2, c3 = st.columns([2,2,1])
101
+ message = c1.text_input("Type your text here!")
102
+ if c2.button("CaptureImage"):
103
+ camera_photo=save()
104
+ if c2.button("Stop camera"):
105
+ CaptureImage =False
106
+
107
+ uploaded_photo = save(c3.file_uploader("Upload your Images/PDF",type=['jpg','png','jpeg','pdf'], on_change=change_photo_state))
108
+ if st.session_state["photo"]=="done" or message:
109
+ if uploaded_photo and uploaded_photo.type=='application/pdf':
110
+ tet = read_pdf(uploaded_photo)
111
+ # with tempfile.NamedTemporaryFile(delete=False) as temp_file:
112
+ # temp_file.write(uploaded_photo.read())
113
+ # temp_file_path = temp_file.name
114
+
115
+ # loader = PyPDFLoader(temp_file_path)
116
+ # if loader:
117
+ # text.extend(loader.load())
118
+ # os.remove(temp_file_path)
119
+ # text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=100, length_function=len)
120
+ # text_chunks = text_splitter.split_documents(text)
121
+ values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
122
+ text = tet[values[0]*7*10:values[1]*10*100] if values[0]!=len(tet)//(10*100) else tet[len(tet)//(10*100):]
123
+ #st.success(type(text_chunks))
124
+ if st.button("English Pdf Summarize"):
125
+ st.subheader("Selected text for summarize: ")
126
+ st.success(text)
127
+ st.subheader("Summarized Text: ")
128
+ engsum(text)
129
+
130
+ elif uploaded_photo and uploaded_photo.type !='application/pdf':
131
+ text=None
132
+ img = Image.open(uploaded_photo)
133
+ img = img.save("img.png")
134
+ img = cv2.imread("img.png")
135
+ st.text("Select the summarization type:")
136
+ c4, c5 = st.columns([1,1])
137
+ if c4.button("BENGALI"):
138
+ text = pytesseract.image_to_string(img, lang="ben")
139
+ st.subheader("সারাংশ/সারমর্ম")
140
+ bansum(text)
141
+ if c5.button("ENGLISH"):
142
+ text=pytesseract.image_to_string(img)
143
+ st.subheader("Summarized Text")
144
+ engsum(text)
145
+ #st.success(text)
146
+ elif camera_photo:
147
+ text=None
148
+ img = Image.open(camera_photo)
149
+ img = img.save("img.png")
150
+ img = cv2.imread("img.png")
151
+ #text = pytesseract.image_to_string(img) if st.checkbox("Bangla") else pytesseract.image_to_string(img, lang="ben")
152
+ st.text("Select the summarization type:")
153
+ c6, c7 = st.columns([1,1])
154
+ if c6.button("Bangla"):
155
+ text = pytesseract.image_to_string(img, lang="ben")
156
+ st.subheader("সারাংশ/সারমর্ম")
157
+ bansum(text)
158
+ if c7.button("English"):
159
+ text=pytesseract.image_to_string(img)
160
+ st.subheader("Summarized Text")
161
+ engsum(text)
162
+ else:
163
+ text=None
164
+ text = message
165
+ c8, c9 = st.columns([1,1])
166
+ if c8.button("Bangla"):
167
+ bansum(text)
168
+ if c9.button("English"):
169
+ engsum(text)
170
+ if st.checkbox("Conversate"):
171
+ with st.container():
172
+ from streamlit_chat import message as st_message
173
+ from transformers import BlenderbotTokenizer
174
+ from transformers import BlenderbotForConditionalGeneration
175
+ st.title("Chatbot!!!")
176
 
177
+ @st.experimental_singleton
178
+ def get_models():
179
+ # it may be necessary for other frameworks to cache the model
180
+ # seems pytorch keeps an internal state of the conversation
181
+ model_name = "facebook/blenderbot-400M-distill"
182
+ tokenizer = BlenderbotTokenizer.from_pretrained(model_name)
183
+ model = BlenderbotForConditionalGeneration.from_pretrained(model_name)
184
+ return tokenizer, model
185
+ if "history" not in st.session_state:
186
+ st.session_state.history = []
187
+ def generate_answer():
188
+ tokenizer, model = get_models()
189
+ user_message = st.session_state.input_text
190
+ inputs = tokenizer(st.session_state.input_text, return_tensors="pt")
191
+ result = model.generate(**inputs)
192
+ message_bot = tokenizer.decode(
193
+ result[0], skip_special_tokens=True
194
+ ) # .replace("<s>", "").replace("</s>", "")
195
+ st.session_state.history.append({"message": user_message, "is_user": True})
196
+ st.session_state.history.append({"message": message_bot, "is_user": False})
197
+ st.text_input("Talk to the bot", key="input_text", on_change=generate_answer)
198
+ from copyreg import clear_extension_cache
199
+ for chat in st.session_state.history:
200
+ st_message(**chat)
201
+ if st.button("Refresh/New Chat"):
202
+ st.session_state.history = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
 
205