Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
import pandas as pd
|
| 3 |
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
|
| 4 |
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
|
| 5 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
|
@@ -32,20 +31,7 @@ os.makedirs(DATA_DIR, exist_ok=True)
|
|
| 32 |
os.makedirs(PERSIST_DIR, exist_ok=True)
|
| 33 |
|
| 34 |
def data_ingestion():
|
| 35 |
-
documents =
|
| 36 |
-
|
| 37 |
-
# Load documents from the data directory
|
| 38 |
-
documents += SimpleDirectoryReader(DATA_DIR).load_data()
|
| 39 |
-
|
| 40 |
-
# Process and load CSV files
|
| 41 |
-
for file in os.listdir(DATA_DIR):
|
| 42 |
-
if file.endswith(".csv"):
|
| 43 |
-
csv_path = os.path.join(DATA_DIR, file)
|
| 44 |
-
df = pd.read_csv(csv_path)
|
| 45 |
-
# Convert DataFrame to a list of text strings (or any other format suitable for your embeddings)
|
| 46 |
-
csv_texts = df.apply(lambda row: " ".join(row.astype(str)), axis=1).tolist()
|
| 47 |
-
documents += csv_texts
|
| 48 |
-
|
| 49 |
storage_context = StorageContext.from_defaults()
|
| 50 |
index = VectorStoreIndex.from_documents(documents)
|
| 51 |
index.storage_context.persist(persist_dir=PERSIST_DIR)
|
|
@@ -111,7 +97,7 @@ for message in st.session_state.messages:
|
|
| 111 |
|
| 112 |
with st.sidebar:
|
| 113 |
st.title("Menu:")
|
| 114 |
-
uploaded_file = st.file_uploader("Upload your PDF
|
| 115 |
video_url = st.text_input("Enter Youtube Video Link: ")
|
| 116 |
if st.button("Submit & Process"):
|
| 117 |
with st.spinner("Processing..."):
|
|
@@ -119,7 +105,7 @@ with st.sidebar:
|
|
| 119 |
remove_old_files()
|
| 120 |
|
| 121 |
if uploaded_file:
|
| 122 |
-
filepath =
|
| 123 |
with open(filepath, "wb") as f:
|
| 124 |
f.write(uploaded_file.getbuffer())
|
| 125 |
|
|
@@ -128,10 +114,10 @@ with st.sidebar:
|
|
| 128 |
with open("data/saved_text.txt", "w") as file:
|
| 129 |
file.write(extracted_text)
|
| 130 |
|
| 131 |
-
data_ingestion() # Process every time new file is uploaded
|
| 132 |
st.success("Done")
|
| 133 |
|
| 134 |
-
user_prompt = st.chat_input("Ask me anything about the content of the PDF
|
| 135 |
|
| 136 |
if user_prompt and (uploaded_file or video_url):
|
| 137 |
st.session_state.messages.append({'role': 'user', "content": user_prompt})
|
|
|
|
| 1 |
import streamlit as st
|
|
|
|
| 2 |
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
|
| 3 |
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
|
| 4 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
|
|
|
| 31 |
os.makedirs(PERSIST_DIR, exist_ok=True)
|
| 32 |
|
| 33 |
def data_ingestion():
|
| 34 |
+
documents = SimpleDirectoryReader(DATA_DIR).load_data()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
storage_context = StorageContext.from_defaults()
|
| 36 |
index = VectorStoreIndex.from_documents(documents)
|
| 37 |
index.storage_context.persist(persist_dir=PERSIST_DIR)
|
|
|
|
| 97 |
|
| 98 |
with st.sidebar:
|
| 99 |
st.title("Menu:")
|
| 100 |
+
uploaded_file = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button")
|
| 101 |
video_url = st.text_input("Enter Youtube Video Link: ")
|
| 102 |
if st.button("Submit & Process"):
|
| 103 |
with st.spinner("Processing..."):
|
|
|
|
| 105 |
remove_old_files()
|
| 106 |
|
| 107 |
if uploaded_file:
|
| 108 |
+
filepath = "data/saved_pdf.pdf"
|
| 109 |
with open(filepath, "wb") as f:
|
| 110 |
f.write(uploaded_file.getbuffer())
|
| 111 |
|
|
|
|
| 114 |
with open("data/saved_text.txt", "w") as file:
|
| 115 |
file.write(extracted_text)
|
| 116 |
|
| 117 |
+
data_ingestion() # Process PDF every time new file is uploaded
|
| 118 |
st.success("Done")
|
| 119 |
|
| 120 |
+
user_prompt = st.chat_input("Ask me anything about the content of the PDF:")
|
| 121 |
|
| 122 |
if user_prompt and (uploaded_file or video_url):
|
| 123 |
st.session_state.messages.append({'role': 'user', "content": user_prompt})
|