Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import streamlit as st
|
2 |
-
import pandas as pd
|
3 |
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
|
4 |
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
|
5 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
@@ -32,20 +31,7 @@ os.makedirs(DATA_DIR, exist_ok=True)
|
|
32 |
os.makedirs(PERSIST_DIR, exist_ok=True)
|
33 |
|
34 |
def data_ingestion():
|
35 |
-
documents =
|
36 |
-
|
37 |
-
# Load documents from the data directory
|
38 |
-
documents += SimpleDirectoryReader(DATA_DIR).load_data()
|
39 |
-
|
40 |
-
# Process and load CSV files
|
41 |
-
for file in os.listdir(DATA_DIR):
|
42 |
-
if file.endswith(".csv"):
|
43 |
-
csv_path = os.path.join(DATA_DIR, file)
|
44 |
-
df = pd.read_csv(csv_path)
|
45 |
-
# Convert DataFrame to a list of text strings (or any other format suitable for your embeddings)
|
46 |
-
csv_texts = df.apply(lambda row: " ".join(row.astype(str)), axis=1).tolist()
|
47 |
-
documents += csv_texts
|
48 |
-
|
49 |
storage_context = StorageContext.from_defaults()
|
50 |
index = VectorStoreIndex.from_documents(documents)
|
51 |
index.storage_context.persist(persist_dir=PERSIST_DIR)
|
@@ -111,7 +97,7 @@ for message in st.session_state.messages:
|
|
111 |
|
112 |
with st.sidebar:
|
113 |
st.title("Menu:")
|
114 |
-
uploaded_file = st.file_uploader("Upload your PDF
|
115 |
video_url = st.text_input("Enter Youtube Video Link: ")
|
116 |
if st.button("Submit & Process"):
|
117 |
with st.spinner("Processing..."):
|
@@ -119,7 +105,7 @@ with st.sidebar:
|
|
119 |
remove_old_files()
|
120 |
|
121 |
if uploaded_file:
|
122 |
-
filepath =
|
123 |
with open(filepath, "wb") as f:
|
124 |
f.write(uploaded_file.getbuffer())
|
125 |
|
@@ -128,10 +114,10 @@ with st.sidebar:
|
|
128 |
with open("data/saved_text.txt", "w") as file:
|
129 |
file.write(extracted_text)
|
130 |
|
131 |
-
data_ingestion() # Process every time new file is uploaded
|
132 |
st.success("Done")
|
133 |
|
134 |
-
user_prompt = st.chat_input("Ask me anything about the content of the PDF
|
135 |
|
136 |
if user_prompt and (uploaded_file or video_url):
|
137 |
st.session_state.messages.append({'role': 'user', "content": user_prompt})
|
|
|
1 |
import streamlit as st
|
|
|
2 |
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
|
3 |
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
|
4 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
|
|
31 |
os.makedirs(PERSIST_DIR, exist_ok=True)
|
32 |
|
33 |
def data_ingestion():
|
34 |
+
documents = SimpleDirectoryReader(DATA_DIR).load_data()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
storage_context = StorageContext.from_defaults()
|
36 |
index = VectorStoreIndex.from_documents(documents)
|
37 |
index.storage_context.persist(persist_dir=PERSIST_DIR)
|
|
|
97 |
|
98 |
with st.sidebar:
|
99 |
st.title("Menu:")
|
100 |
+
uploaded_file = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button")
|
101 |
video_url = st.text_input("Enter Youtube Video Link: ")
|
102 |
if st.button("Submit & Process"):
|
103 |
with st.spinner("Processing..."):
|
|
|
105 |
remove_old_files()
|
106 |
|
107 |
if uploaded_file:
|
108 |
+
filepath = "data/saved_pdf.pdf"
|
109 |
with open(filepath, "wb") as f:
|
110 |
f.write(uploaded_file.getbuffer())
|
111 |
|
|
|
114 |
with open("data/saved_text.txt", "w") as file:
|
115 |
file.write(extracted_text)
|
116 |
|
117 |
+
data_ingestion() # Process PDF every time new file is uploaded
|
118 |
st.success("Done")
|
119 |
|
120 |
+
user_prompt = st.chat_input("Ask me anything about the content of the PDF:")
|
121 |
|
122 |
if user_prompt and (uploaded_file or video_url):
|
123 |
st.session_state.messages.append({'role': 'user', "content": user_prompt})
|