Spaces:
Running
Running
| import gradio as gr | |
| from langchain.chains import LLMChain | |
| from langchain.prompts import PromptTemplate | |
| from langchain.document_loaders import WebBaseLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.llms import HuggingFacePipeline | |
| from transformers import pipeline | |
| from gtts import gTTS | |
| import tempfile | |
| import os | |
| # CPU-friendly summarization LLM | |
| summary_pipe = pipeline("text2text-generation", model="google/flan-t5-base", device=-1) | |
| llm = HuggingFacePipeline(pipeline=summary_pipe) | |
| # Summarization prompt | |
| summary_prompt = PromptTemplate.from_template(""" | |
| Summarize the following webpage content in a clear, concise way: | |
| {text} | |
| Summary: | |
| """) | |
| summary_chain = LLMChain(llm=llm, prompt=summary_prompt) | |
| def url_to_audio_summary(url): | |
| try: | |
| loader = WebBaseLoader(url) | |
| docs = loader.load() | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100) | |
| splits = splitter.split_documents(docs) | |
| full_text = "\n".join([s.page_content for s in splits]) | |
| summary = summary_chain.run(text=full_text) | |
| # Use gTTS for TTS since Hugging Face TTS model failed | |
| tts = gTTS(text=summary) | |
| temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| tts.save(temp_path.name) | |
| return summary, temp_path.name | |
| except Exception as e: | |
| return f"Error: {str(e)}", None | |
| iface = gr.Interface( | |
| fn=url_to_audio_summary, | |
| inputs=gr.Textbox(label="Article URL", placeholder="Paste a news/blog URL here..."), | |
| outputs=[ | |
| gr.Textbox(label="Summary"), | |
| gr.Audio(label="Audio Summary") | |
| ], | |
| title="URL to Audio Summary Agent", | |
| description="Summarizes article from a URL and gives an audio summary. CPU-only using gTTS." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |