Spaces:

Mr-Vicky-01
/

chat-with-PDF

Running

App Files Files Community

Mr-Vicky-01 commited on Aug 5, 2024

Commit

122726f

verified ·

1 Parent(s): 1956d28

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -1

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ from llama_index.llms.huggingface import HuggingFaceInferenceAPI
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core import Settings
 from youtube_transcript_api import YouTubeTranscriptApi
 import shutil
 import os
 import time
@@ -63,6 +64,18 @@ def extract_transcript_details(youtube_video_url):
     except Exception as e:
         st.error(e)
 def handle_query(query):
     storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
     index = load_index_from_storage(storage_context)
@@ -109,6 +122,7 @@ for message in st.session_state.messages:
 with st.sidebar:
     st.title("Menu:")
     uploaded_file = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button")
     video_url = st.text_input("Enter Youtube Video Link: ")
     if st.button("Submit & Process"):
         with st.spinner("Processing..."):
@@ -120,10 +134,15 @@ with st.sidebar:
                 print(filepath)
                 with open(filepath, "wb") as f:
                     f.write(uploaded_file.getbuffer())
             if video_url:
                 extracted_text = extract_transcript_details(video_url)
-                with open("data/saved_text.txt", "w") as file:
                     file.write(extracted_text)
             data_ingestion()  # Process PDF every time new file is uploaded

 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core import Settings
 from youtube_transcript_api import YouTubeTranscriptApi
+from langchain_community.document_loaders import WebBaseLoader
 import shutil
 import os
 import time
     except Exception as e:
         st.error(e)
+def get_url_text(url_link):
+    try:
+        loader = WebBaseLoader(url_link)
+        loader.requests_per_second = 1
+        docs = loader.aload()
+        extracted_text = ""
+        for page in docs:
+            extracted_text += page.page_content
+        return extracted_text
+    except Exception as e:
+        st.error(e)
 def handle_query(query):
     storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
     index = load_index_from_storage(storage_context)
 with st.sidebar:
     st.title("Menu:")
     uploaded_file = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button")
+    uploaded_url = st.text_input("Enter the Documentation URL: ")
     video_url = st.text_input("Enter Youtube Video Link: ")
     if st.button("Submit & Process"):
         with st.spinner("Processing..."):
                 print(filepath)
                 with open(filepath, "wb") as f:
                     f.write(uploaded_file.getbuffer())
+            if uploaded_url:
+                url_text = get_url_text(uploaded_url)
+                with open("data/url_text.txt", "w") as file:
+                    file.write(url_text)
             if video_url:
                 extracted_text = extract_transcript_details(video_url)
+                with open("data/transcript_text.txt", "w") as file:
                     file.write(extracted_text)
             data_ingestion()  # Process PDF every time new file is uploaded