Mr-Vicky-01 commited on
Commit
122726f
·
verified ·
1 Parent(s): 1956d28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -1
app.py CHANGED
@@ -4,6 +4,7 @@ from llama_index.llms.huggingface import HuggingFaceInferenceAPI
4
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
5
  from llama_index.core import Settings
6
  from youtube_transcript_api import YouTubeTranscriptApi
 
7
  import shutil
8
  import os
9
  import time
@@ -63,6 +64,18 @@ def extract_transcript_details(youtube_video_url):
63
  except Exception as e:
64
  st.error(e)
65
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def handle_query(query):
67
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
68
  index = load_index_from_storage(storage_context)
@@ -109,6 +122,7 @@ for message in st.session_state.messages:
109
  with st.sidebar:
110
  st.title("Menu:")
111
  uploaded_file = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button")
 
112
  video_url = st.text_input("Enter Youtube Video Link: ")
113
  if st.button("Submit & Process"):
114
  with st.spinner("Processing..."):
@@ -120,10 +134,15 @@ with st.sidebar:
120
  print(filepath)
121
  with open(filepath, "wb") as f:
122
  f.write(uploaded_file.getbuffer())
 
 
 
 
 
123
 
124
  if video_url:
125
  extracted_text = extract_transcript_details(video_url)
126
- with open("data/saved_text.txt", "w") as file:
127
  file.write(extracted_text)
128
 
129
  data_ingestion() # Process PDF every time new file is uploaded
 
4
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
5
  from llama_index.core import Settings
6
  from youtube_transcript_api import YouTubeTranscriptApi
7
+ from langchain_community.document_loaders import WebBaseLoader
8
  import shutil
9
  import os
10
  import time
 
64
  except Exception as e:
65
  st.error(e)
66
 
67
+ def get_url_text(url_link):
68
+ try:
69
+ loader = WebBaseLoader(url_link)
70
+ loader.requests_per_second = 1
71
+ docs = loader.aload()
72
+ extracted_text = ""
73
+ for page in docs:
74
+ extracted_text += page.page_content
75
+ return extracted_text
76
+ except Exception as e:
77
+ st.error(e)
78
+
79
  def handle_query(query):
80
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
81
  index = load_index_from_storage(storage_context)
 
122
  with st.sidebar:
123
  st.title("Menu:")
124
  uploaded_file = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button")
125
+ uploaded_url = st.text_input("Enter the Documentation URL: ")
126
  video_url = st.text_input("Enter Youtube Video Link: ")
127
  if st.button("Submit & Process"):
128
  with st.spinner("Processing..."):
 
134
  print(filepath)
135
  with open(filepath, "wb") as f:
136
  f.write(uploaded_file.getbuffer())
137
+
138
+ if uploaded_url:
139
+ url_text = get_url_text(uploaded_url)
140
+ with open("data/url_text.txt", "w") as file:
141
+ file.write(url_text)
142
 
143
  if video_url:
144
  extracted_text = extract_transcript_details(video_url)
145
+ with open("data/transcript_text.txt", "w") as file:
146
  file.write(extracted_text)
147
 
148
  data_ingestion() # Process PDF every time new file is uploaded