Spaces:
Sleeping
Sleeping
File size: 3,508 Bytes
7dd982b 76e3793 b82995c f06c20a 7dd982b f06c20a 7dd982b f06c20a 7dd982b 2b8e4f0 7dd982b f06c20a 2b8e4f0 f06c20a 76e3793 f06c20a 76e3793 2b8e4f0 f06c20a 2b8e4f0 f06c20a 2b8e4f0 f06c20a 2b8e4f0 f06c20a 76e3793 f06c20a 2b8e4f0 f06c20a 2b8e4f0 f06c20a 76e3793 f06c20a 76e3793 2b8e4f0 b82995c f06c20a b82995c 7dd982b f06c20a 2b8e4f0 7dd982b f06c20a 2b8e4f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import gradio as gr
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from transformers import pipeline
from gtts import gTTS
from bs4 import BeautifulSoup
import tempfile
import requests
import subprocess
import concurrent.futures
from PIL import Image, ImageDraw
# CPU-friendly summarization model
summary_pipe = pipeline("text2text-generation", model="google/flan-t5-base", device=-1)
llm = HuggingFacePipeline(pipeline=summary_pipe)
# Prompt for <50-word promotional summary
summary_prompt = PromptTemplate.from_template("""
Provide a crisp, promotional-style summary (under 50 words) of the following:
{text}
Summary:
""")
summary_chain = LLMChain(llm=llm, prompt=summary_prompt)
# Extract main article content (first 20 meaningful paragraphs)
def extract_main_content(url):
resp = requests.get(url, timeout=10)
soup = BeautifulSoup(resp.content, "html.parser")
for tag in soup(["nav","header","footer","aside","script","style","noscript"]): tag.decompose()
paras = [p.get_text() for p in soup.find_all("p") if len(p.get_text()) > 60]
content = "\n".join(paras[:20]) # limit to top 20 paragraphs
return content or None
# Create gradient background image
def create_background(image_path, size=(1280,720)):
img = Image.new("RGB", size)
draw = ImageDraw.Draw(img)
for i in range(size[1]):
r = int(10 + (i/size[1])*20)
g = int(20 + (i/size[1])*30)
b = int(50 + (i/size[1])*50)
draw.line([(0, i), (size[0], i)], fill=(r, g, b))
img.save(image_path)
# Generate 5s AV summary
def url_to_av_summary(url):
text = extract_main_content(url)
if not text:
return "Failed to extract article content.", None
text = text[:2000] # truncate
summary = summary_chain.run(text=text).replace('"','')[:250] # short summary
audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
gTTS(text=summary).save(audio_path)
bg_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
create_background(bg_path)
video_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
cmd = [
'ffmpeg', '-y',
'-loop', '1', '-i', bg_path,
'-i', audio_path,
'-vf', (
"drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text='" + summary +
"':fontcolor=white:fontsize=48:box=1:[email protected]:boxborderw=5:"
"x=(w-text_w)/2:y=h-(t*(h+text_h)/5)"
),
'-t', '5',
'-c:v', 'libx264', '-c:a', 'aac', '-pix_fmt', 'yuv420p', '-shortest', video_path
]
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return summary, video_path
# Timeout wrapper: 60s max
def safe_summary(url, timeout_secs=60):
with concurrent.futures.ThreadPoolExecutor() as executor:
future = executor.submit(url_to_av_summary, url)
try:
return future.result(timeout=timeout_secs)
except concurrent.futures.TimeoutError:
return "⏱️ Processing timed out.", None
iface = gr.Interface(
fn=safe_summary,
inputs=gr.Textbox(label="Article URL"),
outputs=[gr.Textbox(label="Summary"), gr.Video(label="Video Preview")],
title="🎥 5-Second AV Summary (CPU-only)",
description="Fast, CPU-only AV summary of a URL. Video capped at 5 seconds to prevent timeouts."
)
if __name__ == '__main__':
iface.launch() |