Spaces:

csccorner
/

URL-to-Audio-Summary

Running

File size: 2,405 Bytes

d9efe10
 
 
 
 
 
 
f4064e9
d9efe10
 
e9d5607
 
d9efe10
f4064e9
d9efe10
 
 
f4064e9
d9efe10
e9d5607
d9efe10
 
 
 
 
 
 
 
e9d5607
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9efe10
 
e9d5607
 
 
f4064e9
e9d5607
d9efe10
e9d5607
f4064e9
 
 
d9efe10
f4064e9
d9efe10
 
 
 
 
 
 
 
 
 
 
f4064e9
e9d5607
d9efe10

import gradio as gr
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import HuggingFacePipeline
from transformers import pipeline
from gtts import gTTS
import tempfile
import os
from bs4 import BeautifulSoup
import requests

# CPU-friendly summarization LLM
summary_pipe = pipeline("text2text-generation", model="google/flan-t5-base", device=-1)
llm = HuggingFacePipeline(pipeline=summary_pipe)

# Summarization prompt
summary_prompt = PromptTemplate.from_template("""
Summarize the following article content in a clear, concise way:

{text}

Summary:
""")

summary_chain = LLMChain(llm=llm, prompt=summary_prompt)

def extract_main_content(url):
    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.content, "html.parser")

        # Remove navigation, header, footer, sidebars, and scripts
        for tag in soup(["nav", "header", "footer", "aside", "script", "style", "noscript"]):
            tag.decompose()

        # Extract main content using tags with significant paragraph text
        paragraphs = soup.find_all("p")
        content = "\n".join([p.get_text() for p in paragraphs if len(p.get_text()) > 60])
        return content.strip()
    except Exception as e:
        return f"Error extracting article content: {str(e)}"

def url_to_audio_summary(url):
    try:
        article_text = extract_main_content(url)
        if article_text.startswith("Error"):
            return article_text, None

        summary = summary_chain.run(text=article_text)

        # Use gTTS for TTS
        tts = gTTS(text=summary)
        temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
        tts.save(temp_path.name)

        return summary, temp_path.name

    except Exception as e:
        return f"Error: {str(e)}", None

iface = gr.Interface(
    fn=url_to_audio_summary,
    inputs=gr.Textbox(label="Article URL", placeholder="Paste a news/blog URL here..."),
    outputs=[
        gr.Textbox(label="Summary"),
        gr.Audio(label="Audio Summary")
    ],
    title="URL to Audio Summary Agent",
    description="Summarizes only the article content from a URL and gives an audio summary. CPU-only using gTTS."
)

if __name__ == "__main__":
    iface.launch()