Spaces:

tony346
/

Basic_RAG_AI_Chatbot_with_Llama2

Sleeping

App Files Files Community

tony346 commited on Jan 19

Commit

1ff0128

verified ·

1 Parent(s): d55c1c9

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -9

app.py CHANGED Viewed

@@ -22,18 +22,39 @@ def get_pdf_text(pdf_docs):
     pdf_doc = pdf_loader.load() # 텍스트를 추출합니다.
     return pdf_doc # 추출한 텍스트를 반환합니다.
-# 과제
-# 아래 텍스트 추출 함수를 작성
 def get_text_file(docs):
-    pass
 def get_csv_file(docs):
-    pass
 def get_json_file(docs):
-    pass
 # 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
 def get_text_chunks(documents):
     text_splitter = RecursiveCharacterTextSplitter(
@@ -94,7 +115,7 @@ def handle_userinput(user_question):
 def main():
     load_dotenv()
-    st.set_page_config(page_title="Chat with multiple Files",
                        page_icon=":books:")
     st.write(css, unsafe_allow_html=True)
@@ -103,7 +124,7 @@ def main():
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = None
-    st.header("Chat with multiple Files:")
     user_question = st.text_input("Ask a question about your documents:")
     if user_question:
         handle_userinput(user_question)

     pdf_doc = pdf_loader.load() # 텍스트를 추출합니다.
     return pdf_doc # 추출한 텍스트를 반환합니다.
 def get_text_file(docs):
+    temp_dir = tempfile.TemporaryDirectory()
+    temp_filepath = os.path.join(temp_dir.name, docs.name)
+    with open(temp_filepath, "wb") as f:
+        f.write(docs.getvalue())
+    text_loader = TextLoader(temp_filepath)
+    text_doc = text_loader.load()
+    return text_doc
 def get_csv_file(docs):
+    temp_dir = tempfile.TemporaryDirectory()
+    temp_filepath = os.path.join(temp_dir.name, docs.name)
+    with open(temp_filepath, "wb") as f:
+        f.write(docs.getvalue())
+    csv_loader = CSVLoader(temp_filepath)
+    csv_doc = csv_loader.load()
+    return csv_doc
 def get_json_file(docs):
+    temp_dir = tempfile.TemporaryDirectory()
+    temp_filepath = os.path.join(temp_dir.name, docs.name)
+    with open(temp_filepath, "wb") as f:
+        f.write(docs.getvalue())
+    json_loader = JSONLoader(temp_filepath,
+                                 jq_schema='.scans[].relationships',
+                                 text_content=False)
+    json_doc = json_loader.load()
+    # print('json_doc = ',json_doc)
+    return json_doc
 # 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
 def get_text_chunks(documents):
     text_splitter = RecursiveCharacterTextSplitter(
 def main():
     load_dotenv()
+    st.set_page_config(page_title="Chat with multiple PDFs",
                        page_icon=":books:")
     st.write(css, unsafe_allow_html=True)
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = None
+    st.header("Chat with multiple PDFs :books:")
     user_question = st.text_input("Ask a question about your documents:")
     if user_question:
         handle_userinput(user_question)