Ahtishamafzaal commited on
Commit
84faacb
·
1 Parent(s): 0c1d861

Space DocumentGPT is created

Browse files
Files changed (3) hide show
  1. .streamlit/secrets.toml +1 -0
  2. app.py +139 -0
  3. requirements.txt +0 -0
.streamlit/secrets.toml ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY = "YOUR OPENAI_API_KEY"
app.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from PyPDF2 import PdfReader
4
+ import docx
5
+ from langchain.chat_models import ChatOpenAI
6
+ from dotenv import load_dotenv
7
+ from langchain.embeddings import HuggingFaceEmbeddings
8
+ from langchain.text_splitter import CharacterTextSplitter
9
+ from langchain.vectorstores import FAISS
10
+ from langchain.chains import ConversationalRetrievalChain
11
+ from langchain.memory import ConversationBufferMemory
12
+ from streamlit_chat import message
13
+ from langchain.callbacks import get_openai_callback
14
+
15
+ def main():
16
+ load_dotenv()
17
+ st.set_page_config(page_title="DocumentGPT", page_icon=":books:")
18
+ st.header(":books: CHAT WITH YOUR DOCUMENTS")
19
+
20
+ if "conversation" not in st.session_state:
21
+ st.session_state.conversation = None
22
+ if "chat_history" not in st.session_state:
23
+ st.session_state.chat_history = None
24
+ if "processComplete" not in st.session_state:
25
+ st.session_state.processComplete = None
26
+
27
+ with st.sidebar:
28
+ uploaded_files = st.file_uploader("**:books: Upload your files**",accept_multiple_files=True)
29
+ openai_api_key = st.text_input("**:key: OpenAI API Key**" , type="password")
30
+ process = st.button("**Process**")
31
+ if process:
32
+ if not openai_api_key:
33
+ st.info("Please add your OpenAI API key to continue.")
34
+ st.stop()
35
+ with st.spinner("Processing"):
36
+ files_text = get_files_text(uploaded_files)
37
+ # get text chunks
38
+ text_chunks = get_text_chunks(files_text)
39
+ # create vetore stores
40
+ vetorestore = get_vectorstore(text_chunks)
41
+
42
+ st.sidebar.info('Processing Complete', icon="✅")
43
+ # create conversation chain
44
+ st.session_state.conversation = get_conversation_chain(vetorestore,openai_api_key) #for openAI
45
+
46
+ st.session_state.processComplete = True
47
+
48
+ if st.session_state.processComplete == True:
49
+ user_question = st.chat_input("Ask Question about your files.")
50
+ if user_question:
51
+ handel_userinput(user_question)
52
+
53
+ # Function to get the input file and read the text from it.
54
+ def get_files_text(uploaded_files):
55
+ text = ""
56
+ for uploaded_file in uploaded_files:
57
+ split_tup = os.path.splitext(uploaded_file.name)
58
+ file_extension = split_tup[1]
59
+ if file_extension == ".pdf":
60
+ text += get_pdf_text(uploaded_file)
61
+ elif file_extension == ".docx":
62
+ text += get_docx_text(uploaded_file)
63
+ else:
64
+ text += get_csv_text(uploaded_file)
65
+ return text
66
+
67
+ # Function to read PDF Files
68
+ def get_pdf_text(pdf):
69
+ pdf_reader = PdfReader(pdf)
70
+ text = ""
71
+ for page in pdf_reader.pages:
72
+ text += page.extract_text()
73
+ return text
74
+
75
+ def get_docx_text(file):
76
+ doc = docx.Document(file)
77
+ allText = []
78
+ for docpara in doc.paragraphs:
79
+ allText.append(docpara.text)
80
+ text = ' '.join(allText)
81
+ return text
82
+
83
+ def get_csv_text(file):
84
+ return "a"
85
+
86
+ def get_text_chunks(text):
87
+ # spilit ito chuncks
88
+ text_splitter = CharacterTextSplitter(
89
+ separator="\n",
90
+ chunk_size=900,
91
+ chunk_overlap=100,
92
+ length_function=len
93
+ )
94
+ chunks = text_splitter.split_text(text)
95
+ return chunks
96
+
97
+
98
+ def get_vectorstore(text_chunks):
99
+ # Using the hugging face embedding models
100
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
101
+ # creating the Vectore Store using Facebook AI Semantic search
102
+ knowledge_base = FAISS.from_texts(text_chunks,embeddings)
103
+ return knowledge_base
104
+
105
+ def get_conversation_chain(vetorestore,openai_api_key):
106
+ llm = ChatOpenAI(openai_api_key=openai_api_key, model_name = 'gpt-3.5-turbo',temperature=0)
107
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
108
+ conversation_chain = ConversationalRetrievalChain.from_llm(
109
+ llm=llm,
110
+ retriever=vetorestore.as_retriever(),
111
+ memory=memory
112
+ )
113
+ return conversation_chain
114
+
115
+
116
+ def handel_userinput(user_question):
117
+ with get_openai_callback() as cb:
118
+ response = st.session_state.conversation({'question':user_question})
119
+ st.session_state.chat_history = response['chat_history']
120
+
121
+ # Layout of input/response containers
122
+ response_container = st.container()
123
+
124
+ with response_container:
125
+ for i, messages in enumerate(st.session_state.chat_history):
126
+ if i % 2 == 0:
127
+ message(messages.content, is_user=True, key=str(i))
128
+ else:
129
+ message(messages.content, key=str(i))
130
+
131
+
132
+ if __name__ == '__main__':
133
+ main()
134
+
135
+
136
+
137
+
138
+
139
+
requirements.txt ADDED
Binary file (3.48 kB). View file