kamau1 commited on
Commit
e6d52ea
·
verified ·
1 Parent(s): e762464

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -11
app.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  from langchain.text_splitter import CharacterTextSplitter
2
  from langchain.embeddings import HuggingFaceBgeEmbeddings
3
  from langchain.vectorstores import FAISS
@@ -51,12 +56,13 @@ def translate(userinput, target_lang, source_lang=None):
51
  translation = result['translated_text']
52
  return source_lange, translation
53
 
54
- def get_pdf_text(pdf_docs : list) -> str:
55
- text = ""
56
- for pdf in pdf_docs:
57
- pdf_reader = PdfReader(pdf)
58
- for page in pdf_reader.pages:
59
- text += page.extract_text()
 
60
  return text
61
 
62
 
@@ -96,13 +102,11 @@ def get_conversation_chain(vectorstore:FAISS) -> ConversationalRetrievalChain:
96
  def main():
97
  st.title("SemaNaPDF📚")
98
  # upload file
99
- pdf_docs = st.file_uploader(
100
- "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
101
- )
102
- if pdf_docs is not None:
103
  with st.spinner("processing"):
104
  # get pdf text
105
- raw_text = get_pdf_text(pdf_docs)
106
 
107
  # get the text chunks
108
  text_chunks = get_text_chunks(raw_text)
@@ -133,6 +137,16 @@ def main():
133
  st.markdown(response)
134
  st.session_state.messages.append({"role": "assistant", "content": response})
135
 
 
 
 
 
 
 
 
 
 
 
136
 
137
  if __name__ == '__main__':
138
  main()
 
1
+ """
2
+ creator: Lewis Kamau Kimaru
3
+ Function: chat with pdf documents in different languages
4
+
5
+ """
6
  from langchain.text_splitter import CharacterTextSplitter
7
  from langchain.embeddings import HuggingFaceBgeEmbeddings
8
  from langchain.vectorstores import FAISS
 
56
  translation = result['translated_text']
57
  return source_lange, translation
58
 
59
+ def get_pdf_text(pdf : Union[str, bytes, bytearray]) -> str:
60
+ reader = PdfReader(pdf)
61
+ pdf_text = ''
62
+ for page in (reader.pages):
63
+ text = page.extract_text()
64
+ if text:
65
+ pdf_text += text
66
  return text
67
 
68
 
 
102
  def main():
103
  st.title("SemaNaPDF📚")
104
  # upload file
105
+ pdf = st.file_uploader("Upload a PDF Document", type="pdf")
106
+ if pdf is not None:
 
 
107
  with st.spinner("processing"):
108
  # get pdf text
109
+ raw_text = get_pdf_text(pdf)
110
 
111
  # get the text chunks
112
  text_chunks = get_text_chunks(raw_text)
 
137
  st.markdown(response)
138
  st.session_state.messages.append({"role": "assistant", "content": response})
139
 
140
+ # Signature
141
+ st.markdown(
142
+ """
143
+ <div style="position: fixed; bottom: 0; right: 0; padding: 10px;">
144
+ <a href="https://kamaukimaru.vercel.app" target="_blank" rel="noopener noreferrer" style="font-size: 12px; color: #808080; text-decoration: none;">©2023 Lewis Kimaru. All rights reserved.</a>
145
+ </div>
146
+ """,
147
+ unsafe_allow_html=True
148
+ )
149
+
150
 
151
  if __name__ == '__main__':
152
  main()