Spaces:

jarif
/

Simplify-YouTube-Videos-Web-Articles

Sleeping

App Files Files Community

jarif commited on Feb 1

Commit

2fc5eb1

verified ·

1 Parent(s): 4ab3b79

Upload 3 files

Browse files

Files changed (3) hide show

.env +3 -0
app.py +172 -0
requirements.txt +0 -0

.env ADDED Viewed

	@@ -0,0 +1,3 @@

+GROQ_API_KEY="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"
+LANGCHAIN_API_KEY="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"
+# OPENAI_API_KEY=""

app.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import os
+import validators
+import streamlit as st
+from dotenv import load_dotenv
+from langchain.prompts import PromptTemplate
+from langchain_groq import ChatGroq
+from langchain.chains.summarize import load_summarize_chain
+from langchain.schema import Document
+import yt_dlp
+import requests
+from bs4 import BeautifulSoup
+import re
+# Load environment variables
+load_dotenv()
+# Streamlit App
+st.set_page_config(page_title="LangChain: Summarize Text From YT or Website", page_icon="🦜")
+st.title("🦜 LangChain: Summarize Text From YT or Website")
+st.subheader("Summarize URL")
+# Get API Key & URL input
+groq_api_key = os.getenv("GROQ_API_KEY")
+if not groq_api_key:
+    st.error("GROQ API Key not found. Please check your environment variables.")
+generic_url = st.text_input("Enter YouTube or Website URL", label_visibility="collapsed")
+# LangChain Model with Groq API
+llm = ChatGroq(model="gemma2-9b-it", groq_api_key=groq_api_key)
+# Prompt Template
+prompt_template = """
+Provide a clear and concise summary in 300 words of the following content:
+{text}
+Focus on the main points and key insights. Write in a professional tone.
+"""
+prompt = PromptTemplate(template=prompt_template, input_variables=["text"])
+def get_youtube_content(url):
+    """Get content from YouTube video using yt-dlp"""
+    try:
+        ydl_opts = {
+            'format': 'worst',
+            'extract_flat': True,
+            'quiet': True,
+            'no_warnings': True
+        }
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(url, download=False)
+            title = info.get('title', '')
+            description = info.get('description', '')
+            views = info.get('view_count', 'Unknown')
+            uploader = info.get('uploader', 'Unknown')
+            upload_date = info.get('upload_date', 'Unknown')
+            content = f"""
+Video Title: {title}
+Uploader: {uploader}
+Upload Date: {upload_date}
+Views: {views}
+Description:
+{description}
+"""
+            return [Document(page_content=content)]
+    except Exception as e:
+        st.error(f"Error getting YouTube content: {str(e)}")
+        return None
+def get_website_content(url):
+    """Get content from website using requests and BeautifulSoup"""
+    try:
+        # Send request with headers to mimic a browser
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+        }
+        response = requests.get(url, headers=headers, verify=False)
+        response.raise_for_status()
+        # Parse HTML
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # Remove script and style elements
+        for script in soup(["script", "style"]):
+            script.decompose()
+        # Get title
+        title = soup.title.string if soup.title else "No title found"
+        # Get main content (adjust selectors based on the website structure)
+        main_content = ""
+        # Try to find article content first
+        article = soup.find('article')
+        if article:
+            main_content = article.get_text()
+        else:
+            # If no article tag, try common content containers
+            content_tags = ['main', 'div.content', 'div.post-content', 'div.article-content']
+            for tag in content_tags:
+                element = soup.select_one(tag)
+                if element:
+                    main_content = element.get_text()
+                    break
+            # If still no content, get all paragraph text
+            if not main_content:
+                paragraphs = soup.find_all('p')
+                main_content = '\n'.join(p.get_text() for p in paragraphs)
+        # Clean up the text
+        # Remove extra whitespace and newlines
+        main_content = re.sub(r'\s+', ' ', main_content).strip()
+        # Remove any remaining HTML tags
+        main_content = re.sub(r'<[^>]+>', '', main_content)
+        content = f"""
+Title: {title}
+URL: {url}
+Content:
+{main_content}
+"""
+        return [Document(page_content=content)]
+    except Exception as e:
+        st.error(f"Error fetching or processing {url}, exception:\n{str(e)}")
+        return None
+if st.button("Summarize the Content from YT or Website"):
+    # Validate Input
+    if not groq_api_key or not generic_url.strip():
+        st.error("Please provide a valid API key and URL.")
+    elif not validators.url(generic_url):
+        st.error("Please enter a valid URL (YouTube or a website).")
+    else:
+        try:
+            with st.spinner("Fetching content and summarizing..."):
+                # Load data from YouTube or Website
+                if "youtube.com" in generic_url or "youtu.be" in generic_url:
+                    docs = get_youtube_content(generic_url)
+                else:
+                    docs = get_website_content(generic_url)
+                if docs is None:
+                    st.stop()
+                # Create the summary chain and run it
+                chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
+                output_summary = chain.run(docs)
+                # Display the results
+                st.success("Summary Generated Successfully!")
+                tab1, tab2 = st.tabs(["Summary", "Raw Content"])
+                with tab1:
+                    st.write(output_summary)
+                with tab2:
+                    if docs:
+                        st.text_area("Original Content", docs[0].page_content, height=300)
+        except Exception as e:
+            st.error(f"An error occurred: {str(e)}")
+            st.exception(e)

requirements.txt ADDED Viewed

Binary file (11.6 kB). View file