Spaces:
Sleeping
Sleeping
import gradio as gr | |
from langchain.chains import LLMChain | |
from langchain.prompts import PromptTemplate | |
from langchain.llms import HuggingFacePipeline | |
from transformers import pipeline | |
from gtts import gTTS | |
from bs4 import BeautifulSoup | |
import tempfile | |
import os | |
import requests | |
from PIL import Image, ImageDraw, ImageFont | |
import subprocess | |
# CPU-friendly summarization model | |
summary_pipe = pipeline("text2text-generation", model="google/flan-t5-base", device=-1) | |
llm = HuggingFacePipeline(pipeline=summary_pipe) | |
# LangChain summarization prompt | |
summary_prompt = PromptTemplate.from_template(""" | |
Summarize the following article content in a clear, concise way: | |
{text} | |
Summary: | |
""") | |
summary_chain = LLMChain(llm=llm, prompt=summary_prompt) | |
def extract_main_content(url): | |
try: | |
response = requests.get(url, timeout=10) | |
soup = BeautifulSoup(response.content, "html.parser") | |
for tag in soup(["nav", "header", "footer", "aside", "script", "style", "noscript"]): | |
tag.decompose() | |
paragraphs = soup.find_all("p") | |
content = "\n".join([p.get_text() for p in paragraphs if len(p.get_text()) > 60]) | |
return content.strip() | |
except Exception as e: | |
return f"Error extracting article content: {str(e)}" | |
def create_text_image(summary_text, image_path): | |
img = Image.new("RGB", (1280, 720), color=(0, 0, 0)) | |
draw = ImageDraw.Draw(img) | |
font = ImageFont.load_default() | |
wrapped = summary_text[:1024] + ('...' if len(summary_text) > 1024 else '') | |
draw.text((50, 50), wrapped, fill=(255, 255, 255), font=font) | |
img.save(image_path) | |
def generate_video(image_path, audio_path, output_path): | |
cmd = [ | |
"ffmpeg", "-y", | |
"-loop", "1", | |
"-i", image_path, | |
"-i", audio_path, | |
"-c:v", "libx264", | |
"-tune", "stillimage", | |
"-c:a", "aac", | |
"-b:a", "192k", | |
"-pix_fmt", "yuv420p", | |
"-shortest", | |
output_path | |
] | |
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | |
def url_to_av_summary(url): | |
try: | |
article_text = extract_main_content(url) | |
if article_text.startswith("Error"): | |
return article_text, None | |
summary = summary_chain.run(text=article_text) | |
tts = gTTS(text=summary) | |
audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name | |
tts.save(audio_path) | |
image_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name | |
video_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name | |
create_text_image(summary, image_path) | |
generate_video(image_path, audio_path, video_path) | |
return summary, video_path | |
except Exception as e: | |
return f"Error: {str(e)}", None | |
iface = gr.Interface( | |
fn=url_to_av_summary, | |
inputs=gr.Textbox(label="Article URL", placeholder="Paste a news/blog URL here..."), | |
outputs=[ | |
gr.Textbox(label="Summary"), | |
gr.Video(label="Video Summary") | |
], | |
title="URL to AV Summary Agent (No MoviePy)", | |
description="Summarizes only article content from a URL and creates a narrated video using ffmpeg + PIL." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |