import gradio as gr from langchain.chains import LLMChain from langchain.prompts import PromptTemplate from langchain.llms import HuggingFacePipeline from transformers import pipeline from gtts import gTTS from bs4 import BeautifulSoup import tempfile import requests import subprocess import concurrent.futures from PIL import Image, ImageDraw # CPU-friendly summarization model summary_pipe = pipeline("text2text-generation", model="google/flan-t5-base", device=-1) llm = HuggingFacePipeline(pipeline=summary_pipe) # Prompt for <50-word promotional summary summary_prompt = PromptTemplate.from_template(""" Provide a crisp, promotional-style summary (under 50 words) of the following: {text} Summary: """) summary_chain = LLMChain(llm=llm, prompt=summary_prompt) # Extract main article content (first 20 meaningful paragraphs) def extract_main_content(url): resp = requests.get(url, timeout=10) soup = BeautifulSoup(resp.content, "html.parser") for tag in soup(["nav","header","footer","aside","script","style","noscript"]): tag.decompose() paras = [p.get_text() for p in soup.find_all("p") if len(p.get_text()) > 60] content = "\n".join(paras[:20]) # limit to top 20 paragraphs return content or None # Create gradient background image def create_background(image_path, size=(1280,720)): img = Image.new("RGB", size) draw = ImageDraw.Draw(img) for i in range(size[1]): r = int(10 + (i/size[1])*20) g = int(20 + (i/size[1])*30) b = int(50 + (i/size[1])*50) draw.line([(0, i), (size[0], i)], fill=(r, g, b)) img.save(image_path) # Generate 5s AV summary def url_to_av_summary(url): text = extract_main_content(url) if not text: return "Failed to extract article content.", None text = text[:2000] # truncate summary = summary_chain.run(text=text).replace('"','')[:250] # short summary audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name gTTS(text=summary).save(audio_path) bg_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name create_background(bg_path) video_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name cmd = [ 'ffmpeg', '-y', '-loop', '1', '-i', bg_path, '-i', audio_path, '-vf', ( "drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text='" + summary + "':fontcolor=white:fontsize=48:box=1:boxcolor=black@0.5:boxborderw=5:" "x=(w-text_w)/2:y=h-(t*(h+text_h)/5)" ), '-t', '5', '-c:v', 'libx264', '-c:a', 'aac', '-pix_fmt', 'yuv420p', '-shortest', video_path ] subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) return summary, video_path # Timeout wrapper: 60s max def safe_summary(url, timeout_secs=60): with concurrent.futures.ThreadPoolExecutor() as executor: future = executor.submit(url_to_av_summary, url) try: return future.result(timeout=timeout_secs) except concurrent.futures.TimeoutError: return "⏱️ Processing timed out.", None iface = gr.Interface( fn=safe_summary, inputs=gr.Textbox(label="Article URL"), outputs=[gr.Textbox(label="Summary"), gr.Video(label="Video Preview")], title="🎥 5-Second AV Summary (CPU-only)", description="Fast, CPU-only AV summary of a URL. Video capped at 5 seconds to prevent timeouts." ) if __name__ == '__main__': iface.launch()